[llvm] [RISCV] Add branch folding before branch relaxation (PR #134760)
Mikhail R. Gadelha via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 11:04:33 PDT 2025
https://github.com/mikhailramalho updated https://github.com/llvm/llvm-project/pull/134760
>From 3d2fee492a17dc96f6d0f59ddaae7545e90a73bd Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Thu, 27 Mar 2025 20:11:16 -0300
Subject: [PATCH 1/6] [RISCV] Add branch folding before branch relaxation
This is a follow-up patch to PR #133256.
This patch adds the branch folding pass after the newly added late
optimization pass for riscv, which reduces code size in all SPEC
benchmarks (except libm).
The improvements are: 500.perlbench_r (-3.37%), 544.nab_r (-3.06%),
557.xz_r (-2.82%), 523.xalancbmk_r (-2.64%), 520.omnetpp_r (-2.34%),
531.deepsjeng_r (-2.27%), 502.gcc_r (-2.19%), 526.blender_r (-2.11%),
538.imagick_r (-2.03%), 505.mcf_r (-1.82%), 541.leela_r (-1.74%),
511.povray_r (-1.62%), 510.parest_r (-1.62%), 508.namd_r (-1.57%),
525.x264_r (-1.47%).
Geo mean is -2.07%.
Some caveats:
* On #131728 I mentioned a 7% improvement on execution time of xz, but
that's no longer the case. I went back and also tried to reproduce the
result with the code from #131728 and couldn't. Now the results from
that PR and this one are the same: an overall code size reduction but
no exec time improvements.
* The root cause of the large number is not yet clear for me. I'm still
investigating it.
---
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 1 +
.../CodeGen/RISCV/GlobalISel/rotl-rotr.ll | 831 ++--
.../CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll | 37 +-
llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll | 20 -
llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll | 267 +-
...lar-shift-by-byte-multiple-legalization.ll | 2787 ++++++------
llvm/test/CodeGen/RISCV/O0-pipeline.ll | 7 +-
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 7 +-
llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll | 40 +-
llvm/test/CodeGen/RISCV/atomic-signext.ll | 108 +-
llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll | 16 -
llvm/test/CodeGen/RISCV/bittest.ll | 462 +-
llvm/test/CodeGen/RISCV/branch_zero.ll | 18 +-
llvm/test/CodeGen/RISCV/cmp-bool.ll | 16 +-
llvm/test/CodeGen/RISCV/copyprop.ll | 15 +-
llvm/test/CodeGen/RISCV/csr-first-use-cost.ll | 54 +-
llvm/test/CodeGen/RISCV/double-br-fcmp.ll | 32 -
.../CodeGen/RISCV/double-maximum-minimum.ll | 63 +-
llvm/test/CodeGen/RISCV/float-br-fcmp.ll | 32 -
.../CodeGen/RISCV/float-maximum-minimum.ll | 56 +-
llvm/test/CodeGen/RISCV/forced-atomics.ll | 25 +-
llvm/test/CodeGen/RISCV/fpclamptosat.ll | 368 +-
llvm/test/CodeGen/RISCV/frame-info.ll | 80 +-
llvm/test/CodeGen/RISCV/half-br-fcmp.ll | 64 -
.../CodeGen/RISCV/half-maximum-minimum.ll | 28 +-
llvm/test/CodeGen/RISCV/machine-pipeliner.ll | 18 +-
.../RISCV/machine-sink-load-immediate.ll | 66 +-
.../RISCV/reduce-unnecessary-extension.ll | 16 +-
.../test/CodeGen/RISCV/riscv-tail-dup-size.ll | 11 +-
llvm/test/CodeGen/RISCV/rv32zbb.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/copyprop.mir | 7 +-
llvm/test/CodeGen/RISCV/rvv/expandload.ll | 3988 +++++------------
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 3182 ++++++-------
.../RISCV/rvv/fixed-vectors-masked-scatter.ll | 2246 ++++------
.../RISCV/rvv/fixed-vectors-unaligned.ll | 47 +-
.../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 612 ++-
llvm/test/CodeGen/RISCV/rvv/pr93587.ll | 10 -
.../CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll | 32 +-
.../RISCV/rvv/vsetvli-insert-crossbb.ll | 44 +-
.../RISCV/rvv/vxrm-insert-out-of-loop.ll | 66 +-
llvm/test/CodeGen/RISCV/sadd_sat.ll | 42 +-
llvm/test/CodeGen/RISCV/sadd_sat_plus.ll | 42 +-
llvm/test/CodeGen/RISCV/setcc-logic.ll | 200 +-
llvm/test/CodeGen/RISCV/sext-zext-trunc.ll | 76 +-
llvm/test/CodeGen/RISCV/shifts.ll | 5 +-
llvm/test/CodeGen/RISCV/simplify-condbr.ll | 31 +-
llvm/test/CodeGen/RISCV/ssub_sat.ll | 42 +-
llvm/test/CodeGen/RISCV/ssub_sat_plus.ll | 42 +-
llvm/test/CodeGen/RISCV/xcvbi.ll | 6 +-
49 files changed, 6412 insertions(+), 9867 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index ec8ba3322a6e1..4e7c154c1a3b5 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -568,6 +568,7 @@ void RISCVPassConfig::addPreEmitPass() {
addPass(createMachineCopyPropagationPass(true));
if (TM->getOptLevel() >= CodeGenOptLevel::Default)
addPass(createRISCVLateBranchOptPass());
+ addPass(&BranchFolderPassID);
addPass(&BranchRelaxationPassID);
addPass(createRISCVMakeCompressibleOptPass());
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll
index 8a786fc9993d2..da8678f9a9916 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll
@@ -296,44 +296,43 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: bltu a5, a4, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a6, a1, a5
-; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: bnez a5, .LBB3_3
-; RV32I-NEXT: j .LBB3_4
+; RV32I-NEXT: j .LBB3_3
; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: srl a3, a0, a2
; RV32I-NEXT: neg a6, a5
; RV32I-NEXT: sll a6, a1, a6
; RV32I-NEXT: or a6, a3, a6
-; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: beqz a5, .LBB3_4
; RV32I-NEXT: .LBB3_3:
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: beqz a5, .LBB3_5
+; RV32I-NEXT: # %bb.4:
; RV32I-NEXT: mv a3, a6
-; RV32I-NEXT: .LBB3_4:
+; RV32I-NEXT: .LBB3_5:
; RV32I-NEXT: neg a6, a2
-; RV32I-NEXT: bltu a5, a4, .LBB3_7
-; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: bltu a5, a4, .LBB3_9
+; RV32I-NEXT: # %bb.6:
; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: .LBB3_7:
; RV32I-NEXT: andi a5, a6, 63
-; RV32I-NEXT: bgeu a5, a4, .LBB3_8
-; RV32I-NEXT: .LBB3_6:
+; RV32I-NEXT: bgeu a5, a4, .LBB3_10
+; RV32I-NEXT: # %bb.8:
; RV32I-NEXT: sll a4, a0, a6
; RV32I-NEXT: neg a7, a5
; RV32I-NEXT: srl a0, a0, a7
; RV32I-NEXT: sll a6, a1, a6
; RV32I-NEXT: or a0, a0, a6
-; RV32I-NEXT: bnez a5, .LBB3_9
-; RV32I-NEXT: j .LBB3_10
-; RV32I-NEXT: .LBB3_7:
+; RV32I-NEXT: bnez a5, .LBB3_11
+; RV32I-NEXT: j .LBB3_12
+; RV32I-NEXT: .LBB3_9:
; RV32I-NEXT: srl a2, a1, a2
-; RV32I-NEXT: andi a5, a6, 63
-; RV32I-NEXT: bltu a5, a4, .LBB3_6
-; RV32I-NEXT: .LBB3_8:
+; RV32I-NEXT: j .LBB3_7
+; RV32I-NEXT: .LBB3_10:
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: sll a0, a0, a5
-; RV32I-NEXT: beqz a5, .LBB3_10
-; RV32I-NEXT: .LBB3_9:
+; RV32I-NEXT: beqz a5, .LBB3_12
+; RV32I-NEXT: .LBB3_11:
; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: .LBB3_10:
+; RV32I-NEXT: .LBB3_12:
; RV32I-NEXT: or a0, a3, a4
; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: ret
@@ -353,44 +352,43 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
; RV32ZBB-NEXT: bltu a5, a4, .LBB3_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: srl a6, a1, a5
-; RV32ZBB-NEXT: mv a3, a0
-; RV32ZBB-NEXT: bnez a5, .LBB3_3
-; RV32ZBB-NEXT: j .LBB3_4
+; RV32ZBB-NEXT: j .LBB3_3
; RV32ZBB-NEXT: .LBB3_2:
; RV32ZBB-NEXT: srl a3, a0, a2
; RV32ZBB-NEXT: neg a6, a5
; RV32ZBB-NEXT: sll a6, a1, a6
; RV32ZBB-NEXT: or a6, a3, a6
-; RV32ZBB-NEXT: mv a3, a0
-; RV32ZBB-NEXT: beqz a5, .LBB3_4
; RV32ZBB-NEXT: .LBB3_3:
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: beqz a5, .LBB3_5
+; RV32ZBB-NEXT: # %bb.4:
; RV32ZBB-NEXT: mv a3, a6
-; RV32ZBB-NEXT: .LBB3_4:
+; RV32ZBB-NEXT: .LBB3_5:
; RV32ZBB-NEXT: neg a6, a2
-; RV32ZBB-NEXT: bltu a5, a4, .LBB3_7
-; RV32ZBB-NEXT: # %bb.5:
+; RV32ZBB-NEXT: bltu a5, a4, .LBB3_9
+; RV32ZBB-NEXT: # %bb.6:
; RV32ZBB-NEXT: li a2, 0
+; RV32ZBB-NEXT: .LBB3_7:
; RV32ZBB-NEXT: andi a5, a6, 63
-; RV32ZBB-NEXT: bgeu a5, a4, .LBB3_8
-; RV32ZBB-NEXT: .LBB3_6:
+; RV32ZBB-NEXT: bgeu a5, a4, .LBB3_10
+; RV32ZBB-NEXT: # %bb.8:
; RV32ZBB-NEXT: sll a4, a0, a6
; RV32ZBB-NEXT: neg a7, a5
; RV32ZBB-NEXT: srl a0, a0, a7
; RV32ZBB-NEXT: sll a6, a1, a6
; RV32ZBB-NEXT: or a0, a0, a6
-; RV32ZBB-NEXT: bnez a5, .LBB3_9
-; RV32ZBB-NEXT: j .LBB3_10
-; RV32ZBB-NEXT: .LBB3_7:
+; RV32ZBB-NEXT: bnez a5, .LBB3_11
+; RV32ZBB-NEXT: j .LBB3_12
+; RV32ZBB-NEXT: .LBB3_9:
; RV32ZBB-NEXT: srl a2, a1, a2
-; RV32ZBB-NEXT: andi a5, a6, 63
-; RV32ZBB-NEXT: bltu a5, a4, .LBB3_6
-; RV32ZBB-NEXT: .LBB3_8:
+; RV32ZBB-NEXT: j .LBB3_7
+; RV32ZBB-NEXT: .LBB3_10:
; RV32ZBB-NEXT: li a4, 0
; RV32ZBB-NEXT: sll a0, a0, a5
-; RV32ZBB-NEXT: beqz a5, .LBB3_10
-; RV32ZBB-NEXT: .LBB3_9:
+; RV32ZBB-NEXT: beqz a5, .LBB3_12
+; RV32ZBB-NEXT: .LBB3_11:
; RV32ZBB-NEXT: mv a1, a0
-; RV32ZBB-NEXT: .LBB3_10:
+; RV32ZBB-NEXT: .LBB3_12:
; RV32ZBB-NEXT: or a0, a3, a4
; RV32ZBB-NEXT: or a1, a2, a1
; RV32ZBB-NEXT: ret
@@ -407,44 +405,43 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
; RV32XTHEADBB-NEXT: bltu a5, a4, .LBB3_2
; RV32XTHEADBB-NEXT: # %bb.1:
; RV32XTHEADBB-NEXT: srl a6, a1, a5
-; RV32XTHEADBB-NEXT: mv a3, a0
-; RV32XTHEADBB-NEXT: bnez a5, .LBB3_3
-; RV32XTHEADBB-NEXT: j .LBB3_4
+; RV32XTHEADBB-NEXT: j .LBB3_3
; RV32XTHEADBB-NEXT: .LBB3_2:
; RV32XTHEADBB-NEXT: srl a3, a0, a2
; RV32XTHEADBB-NEXT: neg a6, a5
; RV32XTHEADBB-NEXT: sll a6, a1, a6
; RV32XTHEADBB-NEXT: or a6, a3, a6
-; RV32XTHEADBB-NEXT: mv a3, a0
-; RV32XTHEADBB-NEXT: beqz a5, .LBB3_4
; RV32XTHEADBB-NEXT: .LBB3_3:
+; RV32XTHEADBB-NEXT: mv a3, a0
+; RV32XTHEADBB-NEXT: beqz a5, .LBB3_5
+; RV32XTHEADBB-NEXT: # %bb.4:
; RV32XTHEADBB-NEXT: mv a3, a6
-; RV32XTHEADBB-NEXT: .LBB3_4:
+; RV32XTHEADBB-NEXT: .LBB3_5:
; RV32XTHEADBB-NEXT: neg a6, a2
-; RV32XTHEADBB-NEXT: bltu a5, a4, .LBB3_7
-; RV32XTHEADBB-NEXT: # %bb.5:
+; RV32XTHEADBB-NEXT: bltu a5, a4, .LBB3_9
+; RV32XTHEADBB-NEXT: # %bb.6:
; RV32XTHEADBB-NEXT: li a2, 0
+; RV32XTHEADBB-NEXT: .LBB3_7:
; RV32XTHEADBB-NEXT: andi a5, a6, 63
-; RV32XTHEADBB-NEXT: bgeu a5, a4, .LBB3_8
-; RV32XTHEADBB-NEXT: .LBB3_6:
+; RV32XTHEADBB-NEXT: bgeu a5, a4, .LBB3_10
+; RV32XTHEADBB-NEXT: # %bb.8:
; RV32XTHEADBB-NEXT: sll a4, a0, a6
; RV32XTHEADBB-NEXT: neg a7, a5
; RV32XTHEADBB-NEXT: srl a0, a0, a7
; RV32XTHEADBB-NEXT: sll a6, a1, a6
; RV32XTHEADBB-NEXT: or a0, a0, a6
-; RV32XTHEADBB-NEXT: bnez a5, .LBB3_9
-; RV32XTHEADBB-NEXT: j .LBB3_10
-; RV32XTHEADBB-NEXT: .LBB3_7:
+; RV32XTHEADBB-NEXT: bnez a5, .LBB3_11
+; RV32XTHEADBB-NEXT: j .LBB3_12
+; RV32XTHEADBB-NEXT: .LBB3_9:
; RV32XTHEADBB-NEXT: srl a2, a1, a2
-; RV32XTHEADBB-NEXT: andi a5, a6, 63
-; RV32XTHEADBB-NEXT: bltu a5, a4, .LBB3_6
-; RV32XTHEADBB-NEXT: .LBB3_8:
+; RV32XTHEADBB-NEXT: j .LBB3_7
+; RV32XTHEADBB-NEXT: .LBB3_10:
; RV32XTHEADBB-NEXT: li a4, 0
; RV32XTHEADBB-NEXT: sll a0, a0, a5
-; RV32XTHEADBB-NEXT: beqz a5, .LBB3_10
-; RV32XTHEADBB-NEXT: .LBB3_9:
+; RV32XTHEADBB-NEXT: beqz a5, .LBB3_12
+; RV32XTHEADBB-NEXT: .LBB3_11:
; RV32XTHEADBB-NEXT: mv a1, a0
-; RV32XTHEADBB-NEXT: .LBB3_10:
+; RV32XTHEADBB-NEXT: .LBB3_12:
; RV32XTHEADBB-NEXT: or a0, a3, a4
; RV32XTHEADBB-NEXT: or a1, a2, a1
; RV32XTHEADBB-NEXT: ret
@@ -961,43 +958,42 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: sll a7, a0, a6
-; RV32I-NEXT: mv a5, a1
-; RV32I-NEXT: bnez a6, .LBB11_3
-; RV32I-NEXT: j .LBB11_4
+; RV32I-NEXT: j .LBB11_3
; RV32I-NEXT: .LBB11_2:
; RV32I-NEXT: sll a3, a0, a2
; RV32I-NEXT: neg a5, a6
; RV32I-NEXT: srl a5, a0, a5
; RV32I-NEXT: sll a7, a1, a2
; RV32I-NEXT: or a7, a5, a7
-; RV32I-NEXT: mv a5, a1
-; RV32I-NEXT: beqz a6, .LBB11_4
; RV32I-NEXT: .LBB11_3:
+; RV32I-NEXT: mv a5, a1
+; RV32I-NEXT: beqz a6, .LBB11_5
+; RV32I-NEXT: # %bb.4:
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB11_4:
+; RV32I-NEXT: .LBB11_5:
; RV32I-NEXT: neg a2, a2
; RV32I-NEXT: andi a6, a2, 63
-; RV32I-NEXT: bltu a6, a4, .LBB11_6
-; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: bltu a6, a4, .LBB11_7
+; RV32I-NEXT: # %bb.6:
; RV32I-NEXT: srl a7, a1, a6
-; RV32I-NEXT: bnez a6, .LBB11_7
-; RV32I-NEXT: j .LBB11_8
-; RV32I-NEXT: .LBB11_6:
+; RV32I-NEXT: bnez a6, .LBB11_8
+; RV32I-NEXT: j .LBB11_9
+; RV32I-NEXT: .LBB11_7:
; RV32I-NEXT: srl a7, a0, a2
; RV32I-NEXT: neg t0, a6
; RV32I-NEXT: sll t0, a1, t0
; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: beqz a6, .LBB11_8
-; RV32I-NEXT: .LBB11_7:
-; RV32I-NEXT: mv a0, a7
+; RV32I-NEXT: beqz a6, .LBB11_9
; RV32I-NEXT: .LBB11_8:
-; RV32I-NEXT: bltu a6, a4, .LBB11_10
-; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: mv a0, a7
+; RV32I-NEXT: .LBB11_9:
+; RV32I-NEXT: bltu a6, a4, .LBB11_11
+; RV32I-NEXT: # %bb.10:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: j .LBB11_11
-; RV32I-NEXT: .LBB11_10:
-; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: j .LBB11_12
; RV32I-NEXT: .LBB11_11:
+; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: .LBB11_12:
; RV32I-NEXT: or a0, a3, a0
; RV32I-NEXT: or a1, a5, a1
; RV32I-NEXT: ret
@@ -1018,43 +1014,42 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: li a3, 0
; RV32ZBB-NEXT: sll a7, a0, a6
-; RV32ZBB-NEXT: mv a5, a1
-; RV32ZBB-NEXT: bnez a6, .LBB11_3
-; RV32ZBB-NEXT: j .LBB11_4
+; RV32ZBB-NEXT: j .LBB11_3
; RV32ZBB-NEXT: .LBB11_2:
; RV32ZBB-NEXT: sll a3, a0, a2
; RV32ZBB-NEXT: neg a5, a6
; RV32ZBB-NEXT: srl a5, a0, a5
; RV32ZBB-NEXT: sll a7, a1, a2
; RV32ZBB-NEXT: or a7, a5, a7
-; RV32ZBB-NEXT: mv a5, a1
-; RV32ZBB-NEXT: beqz a6, .LBB11_4
; RV32ZBB-NEXT: .LBB11_3:
+; RV32ZBB-NEXT: mv a5, a1
+; RV32ZBB-NEXT: beqz a6, .LBB11_5
+; RV32ZBB-NEXT: # %bb.4:
; RV32ZBB-NEXT: mv a5, a7
-; RV32ZBB-NEXT: .LBB11_4:
+; RV32ZBB-NEXT: .LBB11_5:
; RV32ZBB-NEXT: neg a2, a2
; RV32ZBB-NEXT: andi a6, a2, 63
-; RV32ZBB-NEXT: bltu a6, a4, .LBB11_6
-; RV32ZBB-NEXT: # %bb.5:
+; RV32ZBB-NEXT: bltu a6, a4, .LBB11_7
+; RV32ZBB-NEXT: # %bb.6:
; RV32ZBB-NEXT: srl a7, a1, a6
-; RV32ZBB-NEXT: bnez a6, .LBB11_7
-; RV32ZBB-NEXT: j .LBB11_8
-; RV32ZBB-NEXT: .LBB11_6:
+; RV32ZBB-NEXT: bnez a6, .LBB11_8
+; RV32ZBB-NEXT: j .LBB11_9
+; RV32ZBB-NEXT: .LBB11_7:
; RV32ZBB-NEXT: srl a7, a0, a2
; RV32ZBB-NEXT: neg t0, a6
; RV32ZBB-NEXT: sll t0, a1, t0
; RV32ZBB-NEXT: or a7, a7, t0
-; RV32ZBB-NEXT: beqz a6, .LBB11_8
-; RV32ZBB-NEXT: .LBB11_7:
-; RV32ZBB-NEXT: mv a0, a7
+; RV32ZBB-NEXT: beqz a6, .LBB11_9
; RV32ZBB-NEXT: .LBB11_8:
-; RV32ZBB-NEXT: bltu a6, a4, .LBB11_10
-; RV32ZBB-NEXT: # %bb.9:
+; RV32ZBB-NEXT: mv a0, a7
+; RV32ZBB-NEXT: .LBB11_9:
+; RV32ZBB-NEXT: bltu a6, a4, .LBB11_11
+; RV32ZBB-NEXT: # %bb.10:
; RV32ZBB-NEXT: li a1, 0
-; RV32ZBB-NEXT: j .LBB11_11
-; RV32ZBB-NEXT: .LBB11_10:
-; RV32ZBB-NEXT: srl a1, a1, a2
+; RV32ZBB-NEXT: j .LBB11_12
; RV32ZBB-NEXT: .LBB11_11:
+; RV32ZBB-NEXT: srl a1, a1, a2
+; RV32ZBB-NEXT: .LBB11_12:
; RV32ZBB-NEXT: or a0, a3, a0
; RV32ZBB-NEXT: or a1, a5, a1
; RV32ZBB-NEXT: ret
@@ -1075,43 +1070,42 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32XTHEADBB-NEXT: # %bb.1:
; RV32XTHEADBB-NEXT: li a3, 0
; RV32XTHEADBB-NEXT: sll a7, a0, a6
-; RV32XTHEADBB-NEXT: mv a5, a1
-; RV32XTHEADBB-NEXT: bnez a6, .LBB11_3
-; RV32XTHEADBB-NEXT: j .LBB11_4
+; RV32XTHEADBB-NEXT: j .LBB11_3
; RV32XTHEADBB-NEXT: .LBB11_2:
; RV32XTHEADBB-NEXT: sll a3, a0, a2
; RV32XTHEADBB-NEXT: neg a5, a6
; RV32XTHEADBB-NEXT: srl a5, a0, a5
; RV32XTHEADBB-NEXT: sll a7, a1, a2
; RV32XTHEADBB-NEXT: or a7, a5, a7
-; RV32XTHEADBB-NEXT: mv a5, a1
-; RV32XTHEADBB-NEXT: beqz a6, .LBB11_4
; RV32XTHEADBB-NEXT: .LBB11_3:
+; RV32XTHEADBB-NEXT: mv a5, a1
+; RV32XTHEADBB-NEXT: beqz a6, .LBB11_5
+; RV32XTHEADBB-NEXT: # %bb.4:
; RV32XTHEADBB-NEXT: mv a5, a7
-; RV32XTHEADBB-NEXT: .LBB11_4:
+; RV32XTHEADBB-NEXT: .LBB11_5:
; RV32XTHEADBB-NEXT: neg a2, a2
; RV32XTHEADBB-NEXT: andi a6, a2, 63
-; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_6
-; RV32XTHEADBB-NEXT: # %bb.5:
+; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_7
+; RV32XTHEADBB-NEXT: # %bb.6:
; RV32XTHEADBB-NEXT: srl a7, a1, a6
-; RV32XTHEADBB-NEXT: bnez a6, .LBB11_7
-; RV32XTHEADBB-NEXT: j .LBB11_8
-; RV32XTHEADBB-NEXT: .LBB11_6:
+; RV32XTHEADBB-NEXT: bnez a6, .LBB11_8
+; RV32XTHEADBB-NEXT: j .LBB11_9
+; RV32XTHEADBB-NEXT: .LBB11_7:
; RV32XTHEADBB-NEXT: srl a7, a0, a2
; RV32XTHEADBB-NEXT: neg t0, a6
; RV32XTHEADBB-NEXT: sll t0, a1, t0
; RV32XTHEADBB-NEXT: or a7, a7, t0
-; RV32XTHEADBB-NEXT: beqz a6, .LBB11_8
-; RV32XTHEADBB-NEXT: .LBB11_7:
-; RV32XTHEADBB-NEXT: mv a0, a7
+; RV32XTHEADBB-NEXT: beqz a6, .LBB11_9
; RV32XTHEADBB-NEXT: .LBB11_8:
-; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_10
-; RV32XTHEADBB-NEXT: # %bb.9:
+; RV32XTHEADBB-NEXT: mv a0, a7
+; RV32XTHEADBB-NEXT: .LBB11_9:
+; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_11
+; RV32XTHEADBB-NEXT: # %bb.10:
; RV32XTHEADBB-NEXT: li a1, 0
-; RV32XTHEADBB-NEXT: j .LBB11_11
-; RV32XTHEADBB-NEXT: .LBB11_10:
-; RV32XTHEADBB-NEXT: srl a1, a1, a2
+; RV32XTHEADBB-NEXT: j .LBB11_12
; RV32XTHEADBB-NEXT: .LBB11_11:
+; RV32XTHEADBB-NEXT: srl a1, a1, a2
+; RV32XTHEADBB-NEXT: .LBB11_12:
; RV32XTHEADBB-NEXT: or a0, a3, a0
; RV32XTHEADBB-NEXT: or a1, a5, a1
; RV32XTHEADBB-NEXT: ret
@@ -1406,44 +1400,43 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: bltu a4, a5, .LBB14_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a6, a1, a4
-; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: bnez a4, .LBB14_3
-; RV32I-NEXT: j .LBB14_4
+; RV32I-NEXT: j .LBB14_3
; RV32I-NEXT: .LBB14_2:
; RV32I-NEXT: srl a3, a0, a2
; RV32I-NEXT: neg a6, a4
; RV32I-NEXT: sll a6, a1, a6
; RV32I-NEXT: or a6, a3, a6
-; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: beqz a4, .LBB14_4
; RV32I-NEXT: .LBB14_3:
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: beqz a4, .LBB14_5
+; RV32I-NEXT: # %bb.4:
; RV32I-NEXT: mv a3, a6
-; RV32I-NEXT: .LBB14_4:
-; RV32I-NEXT: bltu a4, a5, .LBB14_6
-; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: .LBB14_5:
+; RV32I-NEXT: bltu a4, a5, .LBB14_7
+; RV32I-NEXT: # %bb.6:
; RV32I-NEXT: li a4, 0
-; RV32I-NEXT: j .LBB14_7
-; RV32I-NEXT: .LBB14_6:
-; RV32I-NEXT: srl a4, a1, a2
+; RV32I-NEXT: j .LBB14_8
; RV32I-NEXT: .LBB14_7:
+; RV32I-NEXT: srl a4, a1, a2
+; RV32I-NEXT: .LBB14_8:
; RV32I-NEXT: neg a7, a2
; RV32I-NEXT: andi a6, a7, 63
-; RV32I-NEXT: bltu a6, a5, .LBB14_9
-; RV32I-NEXT: # %bb.8:
+; RV32I-NEXT: bltu a6, a5, .LBB14_10
+; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: sll a0, a0, a6
-; RV32I-NEXT: bnez a6, .LBB14_10
-; RV32I-NEXT: j .LBB14_11
-; RV32I-NEXT: .LBB14_9:
+; RV32I-NEXT: bnez a6, .LBB14_11
+; RV32I-NEXT: j .LBB14_12
+; RV32I-NEXT: .LBB14_10:
; RV32I-NEXT: sll a2, a0, a7
; RV32I-NEXT: neg a5, a6
; RV32I-NEXT: srl a0, a0, a5
; RV32I-NEXT: sll a5, a1, a7
; RV32I-NEXT: or a0, a0, a5
-; RV32I-NEXT: beqz a6, .LBB14_11
-; RV32I-NEXT: .LBB14_10:
-; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: beqz a6, .LBB14_12
; RV32I-NEXT: .LBB14_11:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB14_12:
; RV32I-NEXT: or a0, a3, a2
; RV32I-NEXT: or a1, a4, a1
; RV32I-NEXT: ret
@@ -1463,44 +1456,43 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32ZBB-NEXT: bltu a4, a5, .LBB14_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: srl a6, a1, a4
-; RV32ZBB-NEXT: mv a3, a0
-; RV32ZBB-NEXT: bnez a4, .LBB14_3
-; RV32ZBB-NEXT: j .LBB14_4
+; RV32ZBB-NEXT: j .LBB14_3
; RV32ZBB-NEXT: .LBB14_2:
; RV32ZBB-NEXT: srl a3, a0, a2
; RV32ZBB-NEXT: neg a6, a4
; RV32ZBB-NEXT: sll a6, a1, a6
; RV32ZBB-NEXT: or a6, a3, a6
-; RV32ZBB-NEXT: mv a3, a0
-; RV32ZBB-NEXT: beqz a4, .LBB14_4
; RV32ZBB-NEXT: .LBB14_3:
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: beqz a4, .LBB14_5
+; RV32ZBB-NEXT: # %bb.4:
; RV32ZBB-NEXT: mv a3, a6
-; RV32ZBB-NEXT: .LBB14_4:
-; RV32ZBB-NEXT: bltu a4, a5, .LBB14_6
-; RV32ZBB-NEXT: # %bb.5:
+; RV32ZBB-NEXT: .LBB14_5:
+; RV32ZBB-NEXT: bltu a4, a5, .LBB14_7
+; RV32ZBB-NEXT: # %bb.6:
; RV32ZBB-NEXT: li a4, 0
-; RV32ZBB-NEXT: j .LBB14_7
-; RV32ZBB-NEXT: .LBB14_6:
-; RV32ZBB-NEXT: srl a4, a1, a2
+; RV32ZBB-NEXT: j .LBB14_8
; RV32ZBB-NEXT: .LBB14_7:
+; RV32ZBB-NEXT: srl a4, a1, a2
+; RV32ZBB-NEXT: .LBB14_8:
; RV32ZBB-NEXT: neg a7, a2
; RV32ZBB-NEXT: andi a6, a7, 63
-; RV32ZBB-NEXT: bltu a6, a5, .LBB14_9
-; RV32ZBB-NEXT: # %bb.8:
+; RV32ZBB-NEXT: bltu a6, a5, .LBB14_10
+; RV32ZBB-NEXT: # %bb.9:
; RV32ZBB-NEXT: li a2, 0
; RV32ZBB-NEXT: sll a0, a0, a6
-; RV32ZBB-NEXT: bnez a6, .LBB14_10
-; RV32ZBB-NEXT: j .LBB14_11
-; RV32ZBB-NEXT: .LBB14_9:
+; RV32ZBB-NEXT: bnez a6, .LBB14_11
+; RV32ZBB-NEXT: j .LBB14_12
+; RV32ZBB-NEXT: .LBB14_10:
; RV32ZBB-NEXT: sll a2, a0, a7
; RV32ZBB-NEXT: neg a5, a6
; RV32ZBB-NEXT: srl a0, a0, a5
; RV32ZBB-NEXT: sll a5, a1, a7
; RV32ZBB-NEXT: or a0, a0, a5
-; RV32ZBB-NEXT: beqz a6, .LBB14_11
-; RV32ZBB-NEXT: .LBB14_10:
-; RV32ZBB-NEXT: mv a1, a0
+; RV32ZBB-NEXT: beqz a6, .LBB14_12
; RV32ZBB-NEXT: .LBB14_11:
+; RV32ZBB-NEXT: mv a1, a0
+; RV32ZBB-NEXT: .LBB14_12:
; RV32ZBB-NEXT: or a0, a3, a2
; RV32ZBB-NEXT: or a1, a4, a1
; RV32ZBB-NEXT: ret
@@ -1520,44 +1512,43 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32XTHEADBB-NEXT: bltu a4, a5, .LBB14_2
; RV32XTHEADBB-NEXT: # %bb.1:
; RV32XTHEADBB-NEXT: srl a6, a1, a4
-; RV32XTHEADBB-NEXT: mv a3, a0
-; RV32XTHEADBB-NEXT: bnez a4, .LBB14_3
-; RV32XTHEADBB-NEXT: j .LBB14_4
+; RV32XTHEADBB-NEXT: j .LBB14_3
; RV32XTHEADBB-NEXT: .LBB14_2:
; RV32XTHEADBB-NEXT: srl a3, a0, a2
; RV32XTHEADBB-NEXT: neg a6, a4
; RV32XTHEADBB-NEXT: sll a6, a1, a6
; RV32XTHEADBB-NEXT: or a6, a3, a6
-; RV32XTHEADBB-NEXT: mv a3, a0
-; RV32XTHEADBB-NEXT: beqz a4, .LBB14_4
; RV32XTHEADBB-NEXT: .LBB14_3:
+; RV32XTHEADBB-NEXT: mv a3, a0
+; RV32XTHEADBB-NEXT: beqz a4, .LBB14_5
+; RV32XTHEADBB-NEXT: # %bb.4:
; RV32XTHEADBB-NEXT: mv a3, a6
-; RV32XTHEADBB-NEXT: .LBB14_4:
-; RV32XTHEADBB-NEXT: bltu a4, a5, .LBB14_6
-; RV32XTHEADBB-NEXT: # %bb.5:
+; RV32XTHEADBB-NEXT: .LBB14_5:
+; RV32XTHEADBB-NEXT: bltu a4, a5, .LBB14_7
+; RV32XTHEADBB-NEXT: # %bb.6:
; RV32XTHEADBB-NEXT: li a4, 0
-; RV32XTHEADBB-NEXT: j .LBB14_7
-; RV32XTHEADBB-NEXT: .LBB14_6:
-; RV32XTHEADBB-NEXT: srl a4, a1, a2
+; RV32XTHEADBB-NEXT: j .LBB14_8
; RV32XTHEADBB-NEXT: .LBB14_7:
+; RV32XTHEADBB-NEXT: srl a4, a1, a2
+; RV32XTHEADBB-NEXT: .LBB14_8:
; RV32XTHEADBB-NEXT: neg a7, a2
; RV32XTHEADBB-NEXT: andi a6, a7, 63
-; RV32XTHEADBB-NEXT: bltu a6, a5, .LBB14_9
-; RV32XTHEADBB-NEXT: # %bb.8:
+; RV32XTHEADBB-NEXT: bltu a6, a5, .LBB14_10
+; RV32XTHEADBB-NEXT: # %bb.9:
; RV32XTHEADBB-NEXT: li a2, 0
; RV32XTHEADBB-NEXT: sll a0, a0, a6
-; RV32XTHEADBB-NEXT: bnez a6, .LBB14_10
-; RV32XTHEADBB-NEXT: j .LBB14_11
-; RV32XTHEADBB-NEXT: .LBB14_9:
+; RV32XTHEADBB-NEXT: bnez a6, .LBB14_11
+; RV32XTHEADBB-NEXT: j .LBB14_12
+; RV32XTHEADBB-NEXT: .LBB14_10:
; RV32XTHEADBB-NEXT: sll a2, a0, a7
; RV32XTHEADBB-NEXT: neg a5, a6
; RV32XTHEADBB-NEXT: srl a0, a0, a5
; RV32XTHEADBB-NEXT: sll a5, a1, a7
; RV32XTHEADBB-NEXT: or a0, a0, a5
-; RV32XTHEADBB-NEXT: beqz a6, .LBB14_11
-; RV32XTHEADBB-NEXT: .LBB14_10:
-; RV32XTHEADBB-NEXT: mv a1, a0
+; RV32XTHEADBB-NEXT: beqz a6, .LBB14_12
; RV32XTHEADBB-NEXT: .LBB14_11:
+; RV32XTHEADBB-NEXT: mv a1, a0
+; RV32XTHEADBB-NEXT: .LBB14_12:
; RV32XTHEADBB-NEXT: or a0, a3, a2
; RV32XTHEADBB-NEXT: or a1, a4, a1
; RV32XTHEADBB-NEXT: ret
@@ -2061,60 +2052,59 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV32I-NEXT: bltu a5, t0, .LBB19_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl t1, a1, a5
-; RV32I-NEXT: mv a7, a0
-; RV32I-NEXT: bnez a5, .LBB19_3
-; RV32I-NEXT: j .LBB19_4
+; RV32I-NEXT: j .LBB19_3
; RV32I-NEXT: .LBB19_2:
; RV32I-NEXT: srl a7, a0, a4
; RV32I-NEXT: sll t1, a1, a6
; RV32I-NEXT: or t1, a7, t1
-; RV32I-NEXT: mv a7, a0
-; RV32I-NEXT: beqz a5, .LBB19_4
; RV32I-NEXT: .LBB19_3:
+; RV32I-NEXT: mv a7, a0
+; RV32I-NEXT: beqz a5, .LBB19_5
+; RV32I-NEXT: # %bb.4:
; RV32I-NEXT: mv a7, t1
-; RV32I-NEXT: .LBB19_4:
+; RV32I-NEXT: .LBB19_5:
; RV32I-NEXT: neg t4, a5
-; RV32I-NEXT: bltu a5, t0, .LBB19_7
-; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: bltu a5, t0, .LBB19_9
+; RV32I-NEXT: # %bb.6:
; RV32I-NEXT: li t1, 0
+; RV32I-NEXT: .LBB19_7:
; RV32I-NEXT: andi t3, t4, 63
-; RV32I-NEXT: bgeu t3, t0, .LBB19_8
-; RV32I-NEXT: .LBB19_6:
+; RV32I-NEXT: bgeu t3, t0, .LBB19_10
+; RV32I-NEXT: # %bb.8:
; RV32I-NEXT: sll t2, a0, t4
; RV32I-NEXT: neg t5, t3
; RV32I-NEXT: srl a0, a0, t5
; RV32I-NEXT: sll t4, a1, t4
; RV32I-NEXT: or a0, a0, t4
-; RV32I-NEXT: bnez t3, .LBB19_9
-; RV32I-NEXT: j .LBB19_10
-; RV32I-NEXT: .LBB19_7:
+; RV32I-NEXT: bnez t3, .LBB19_11
+; RV32I-NEXT: j .LBB19_12
+; RV32I-NEXT: .LBB19_9:
; RV32I-NEXT: srl t1, a1, a4
-; RV32I-NEXT: andi t3, t4, 63
-; RV32I-NEXT: bltu t3, t0, .LBB19_6
-; RV32I-NEXT: .LBB19_8:
+; RV32I-NEXT: j .LBB19_7
+; RV32I-NEXT: .LBB19_10:
; RV32I-NEXT: li t2, 0
; RV32I-NEXT: sll a0, a0, t3
-; RV32I-NEXT: beqz t3, .LBB19_10
-; RV32I-NEXT: .LBB19_9:
+; RV32I-NEXT: beqz t3, .LBB19_12
+; RV32I-NEXT: .LBB19_11:
; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: .LBB19_10:
-; RV32I-NEXT: bltu a5, t0, .LBB19_12
-; RV32I-NEXT: # %bb.11:
+; RV32I-NEXT: .LBB19_12:
+; RV32I-NEXT: bltu a5, t0, .LBB19_14
+; RV32I-NEXT: # %bb.13:
; RV32I-NEXT: li t0, 0
; RV32I-NEXT: sll a0, a2, a5
-; RV32I-NEXT: j .LBB19_13
-; RV32I-NEXT: .LBB19_12:
+; RV32I-NEXT: j .LBB19_15
+; RV32I-NEXT: .LBB19_14:
; RV32I-NEXT: sll t0, a2, a4
; RV32I-NEXT: srl a0, a2, a6
; RV32I-NEXT: sll a2, a3, a4
; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: .LBB19_13:
+; RV32I-NEXT: .LBB19_15:
; RV32I-NEXT: or a2, a7, t2
; RV32I-NEXT: or a1, t1, a1
-; RV32I-NEXT: beqz a5, .LBB19_15
-; RV32I-NEXT: # %bb.14:
+; RV32I-NEXT: beqz a5, .LBB19_17
+; RV32I-NEXT: # %bb.16:
; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: .LBB19_15:
+; RV32I-NEXT: .LBB19_17:
; RV32I-NEXT: add a0, a2, t0
; RV32I-NEXT: sltu a2, a0, t0
; RV32I-NEXT: add a1, a1, a3
@@ -2140,60 +2130,59 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV32ZBB-NEXT: bltu a5, t0, .LBB19_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: srl t1, a1, a5
-; RV32ZBB-NEXT: mv a7, a0
-; RV32ZBB-NEXT: bnez a5, .LBB19_3
-; RV32ZBB-NEXT: j .LBB19_4
+; RV32ZBB-NEXT: j .LBB19_3
; RV32ZBB-NEXT: .LBB19_2:
; RV32ZBB-NEXT: srl a7, a0, a4
; RV32ZBB-NEXT: sll t1, a1, a6
; RV32ZBB-NEXT: or t1, a7, t1
-; RV32ZBB-NEXT: mv a7, a0
-; RV32ZBB-NEXT: beqz a5, .LBB19_4
; RV32ZBB-NEXT: .LBB19_3:
+; RV32ZBB-NEXT: mv a7, a0
+; RV32ZBB-NEXT: beqz a5, .LBB19_5
+; RV32ZBB-NEXT: # %bb.4:
; RV32ZBB-NEXT: mv a7, t1
-; RV32ZBB-NEXT: .LBB19_4:
+; RV32ZBB-NEXT: .LBB19_5:
; RV32ZBB-NEXT: neg t4, a5
-; RV32ZBB-NEXT: bltu a5, t0, .LBB19_7
-; RV32ZBB-NEXT: # %bb.5:
+; RV32ZBB-NEXT: bltu a5, t0, .LBB19_9
+; RV32ZBB-NEXT: # %bb.6:
; RV32ZBB-NEXT: li t1, 0
+; RV32ZBB-NEXT: .LBB19_7:
; RV32ZBB-NEXT: andi t3, t4, 63
-; RV32ZBB-NEXT: bgeu t3, t0, .LBB19_8
-; RV32ZBB-NEXT: .LBB19_6:
+; RV32ZBB-NEXT: bgeu t3, t0, .LBB19_10
+; RV32ZBB-NEXT: # %bb.8:
; RV32ZBB-NEXT: sll t2, a0, t4
; RV32ZBB-NEXT: neg t5, t3
; RV32ZBB-NEXT: srl a0, a0, t5
; RV32ZBB-NEXT: sll t4, a1, t4
; RV32ZBB-NEXT: or a0, a0, t4
-; RV32ZBB-NEXT: bnez t3, .LBB19_9
-; RV32ZBB-NEXT: j .LBB19_10
-; RV32ZBB-NEXT: .LBB19_7:
+; RV32ZBB-NEXT: bnez t3, .LBB19_11
+; RV32ZBB-NEXT: j .LBB19_12
+; RV32ZBB-NEXT: .LBB19_9:
; RV32ZBB-NEXT: srl t1, a1, a4
-; RV32ZBB-NEXT: andi t3, t4, 63
-; RV32ZBB-NEXT: bltu t3, t0, .LBB19_6
-; RV32ZBB-NEXT: .LBB19_8:
+; RV32ZBB-NEXT: j .LBB19_7
+; RV32ZBB-NEXT: .LBB19_10:
; RV32ZBB-NEXT: li t2, 0
; RV32ZBB-NEXT: sll a0, a0, t3
-; RV32ZBB-NEXT: beqz t3, .LBB19_10
-; RV32ZBB-NEXT: .LBB19_9:
+; RV32ZBB-NEXT: beqz t3, .LBB19_12
+; RV32ZBB-NEXT: .LBB19_11:
; RV32ZBB-NEXT: mv a1, a0
-; RV32ZBB-NEXT: .LBB19_10:
-; RV32ZBB-NEXT: bltu a5, t0, .LBB19_12
-; RV32ZBB-NEXT: # %bb.11:
+; RV32ZBB-NEXT: .LBB19_12:
+; RV32ZBB-NEXT: bltu a5, t0, .LBB19_14
+; RV32ZBB-NEXT: # %bb.13:
; RV32ZBB-NEXT: li t0, 0
; RV32ZBB-NEXT: sll a0, a2, a5
-; RV32ZBB-NEXT: j .LBB19_13
-; RV32ZBB-NEXT: .LBB19_12:
+; RV32ZBB-NEXT: j .LBB19_15
+; RV32ZBB-NEXT: .LBB19_14:
; RV32ZBB-NEXT: sll t0, a2, a4
; RV32ZBB-NEXT: srl a0, a2, a6
; RV32ZBB-NEXT: sll a2, a3, a4
; RV32ZBB-NEXT: or a0, a0, a2
-; RV32ZBB-NEXT: .LBB19_13:
+; RV32ZBB-NEXT: .LBB19_15:
; RV32ZBB-NEXT: or a2, a7, t2
; RV32ZBB-NEXT: or a1, t1, a1
-; RV32ZBB-NEXT: beqz a5, .LBB19_15
-; RV32ZBB-NEXT: # %bb.14:
+; RV32ZBB-NEXT: beqz a5, .LBB19_17
+; RV32ZBB-NEXT: # %bb.16:
; RV32ZBB-NEXT: mv a3, a0
-; RV32ZBB-NEXT: .LBB19_15:
+; RV32ZBB-NEXT: .LBB19_17:
; RV32ZBB-NEXT: add a0, a2, t0
; RV32ZBB-NEXT: sltu a2, a0, t0
; RV32ZBB-NEXT: add a1, a1, a3
@@ -2215,60 +2204,59 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_2
; RV32XTHEADBB-NEXT: # %bb.1:
; RV32XTHEADBB-NEXT: srl t1, a1, a5
-; RV32XTHEADBB-NEXT: mv a7, a0
-; RV32XTHEADBB-NEXT: bnez a5, .LBB19_3
-; RV32XTHEADBB-NEXT: j .LBB19_4
+; RV32XTHEADBB-NEXT: j .LBB19_3
; RV32XTHEADBB-NEXT: .LBB19_2:
; RV32XTHEADBB-NEXT: srl a7, a0, a4
; RV32XTHEADBB-NEXT: sll t1, a1, a6
; RV32XTHEADBB-NEXT: or t1, a7, t1
-; RV32XTHEADBB-NEXT: mv a7, a0
-; RV32XTHEADBB-NEXT: beqz a5, .LBB19_4
; RV32XTHEADBB-NEXT: .LBB19_3:
+; RV32XTHEADBB-NEXT: mv a7, a0
+; RV32XTHEADBB-NEXT: beqz a5, .LBB19_5
+; RV32XTHEADBB-NEXT: # %bb.4:
; RV32XTHEADBB-NEXT: mv a7, t1
-; RV32XTHEADBB-NEXT: .LBB19_4:
+; RV32XTHEADBB-NEXT: .LBB19_5:
; RV32XTHEADBB-NEXT: neg t4, a5
-; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_7
-; RV32XTHEADBB-NEXT: # %bb.5:
+; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_9
+; RV32XTHEADBB-NEXT: # %bb.6:
; RV32XTHEADBB-NEXT: li t1, 0
+; RV32XTHEADBB-NEXT: .LBB19_7:
; RV32XTHEADBB-NEXT: andi t3, t4, 63
-; RV32XTHEADBB-NEXT: bgeu t3, t0, .LBB19_8
-; RV32XTHEADBB-NEXT: .LBB19_6:
+; RV32XTHEADBB-NEXT: bgeu t3, t0, .LBB19_10
+; RV32XTHEADBB-NEXT: # %bb.8:
; RV32XTHEADBB-NEXT: sll t2, a0, t4
; RV32XTHEADBB-NEXT: neg t5, t3
; RV32XTHEADBB-NEXT: srl a0, a0, t5
; RV32XTHEADBB-NEXT: sll t4, a1, t4
; RV32XTHEADBB-NEXT: or a0, a0, t4
-; RV32XTHEADBB-NEXT: bnez t3, .LBB19_9
-; RV32XTHEADBB-NEXT: j .LBB19_10
-; RV32XTHEADBB-NEXT: .LBB19_7:
+; RV32XTHEADBB-NEXT: bnez t3, .LBB19_11
+; RV32XTHEADBB-NEXT: j .LBB19_12
+; RV32XTHEADBB-NEXT: .LBB19_9:
; RV32XTHEADBB-NEXT: srl t1, a1, a4
-; RV32XTHEADBB-NEXT: andi t3, t4, 63
-; RV32XTHEADBB-NEXT: bltu t3, t0, .LBB19_6
-; RV32XTHEADBB-NEXT: .LBB19_8:
+; RV32XTHEADBB-NEXT: j .LBB19_7
+; RV32XTHEADBB-NEXT: .LBB19_10:
; RV32XTHEADBB-NEXT: li t2, 0
; RV32XTHEADBB-NEXT: sll a0, a0, t3
-; RV32XTHEADBB-NEXT: beqz t3, .LBB19_10
-; RV32XTHEADBB-NEXT: .LBB19_9:
+; RV32XTHEADBB-NEXT: beqz t3, .LBB19_12
+; RV32XTHEADBB-NEXT: .LBB19_11:
; RV32XTHEADBB-NEXT: mv a1, a0
-; RV32XTHEADBB-NEXT: .LBB19_10:
-; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_12
-; RV32XTHEADBB-NEXT: # %bb.11:
+; RV32XTHEADBB-NEXT: .LBB19_12:
+; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_14
+; RV32XTHEADBB-NEXT: # %bb.13:
; RV32XTHEADBB-NEXT: li t0, 0
; RV32XTHEADBB-NEXT: sll a0, a2, a5
-; RV32XTHEADBB-NEXT: j .LBB19_13
-; RV32XTHEADBB-NEXT: .LBB19_12:
+; RV32XTHEADBB-NEXT: j .LBB19_15
+; RV32XTHEADBB-NEXT: .LBB19_14:
; RV32XTHEADBB-NEXT: sll t0, a2, a4
; RV32XTHEADBB-NEXT: srl a0, a2, a6
; RV32XTHEADBB-NEXT: sll a2, a3, a4
; RV32XTHEADBB-NEXT: or a0, a0, a2
-; RV32XTHEADBB-NEXT: .LBB19_13:
+; RV32XTHEADBB-NEXT: .LBB19_15:
; RV32XTHEADBB-NEXT: or a2, a7, t2
; RV32XTHEADBB-NEXT: or a1, t1, a1
-; RV32XTHEADBB-NEXT: beqz a5, .LBB19_15
-; RV32XTHEADBB-NEXT: # %bb.14:
+; RV32XTHEADBB-NEXT: beqz a5, .LBB19_17
+; RV32XTHEADBB-NEXT: # %bb.16:
; RV32XTHEADBB-NEXT: mv a3, a0
-; RV32XTHEADBB-NEXT: .LBB19_15:
+; RV32XTHEADBB-NEXT: .LBB19_17:
; RV32XTHEADBB-NEXT: add a0, a2, t0
; RV32XTHEADBB-NEXT: sltu a2, a0, t0
; RV32XTHEADBB-NEXT: add a1, a1, a3
@@ -2415,42 +2403,41 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32I-NEXT: srl t3, a2, t3
; RV32I-NEXT: sll a4, a3, a4
; RV32I-NEXT: or t3, t3, a4
-; RV32I-NEXT: mv a4, a3
-; RV32I-NEXT: bnez t1, .LBB21_14
-; RV32I-NEXT: j .LBB21_15
+; RV32I-NEXT: j .LBB21_14
; RV32I-NEXT: .LBB21_12:
; RV32I-NEXT: srl t5, a1, a7
; RV32I-NEXT: bltu t1, a5, .LBB21_11
; RV32I-NEXT: .LBB21_13:
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: sll t3, a2, t1
-; RV32I-NEXT: mv a4, a3
-; RV32I-NEXT: beqz t1, .LBB21_15
; RV32I-NEXT: .LBB21_14:
+; RV32I-NEXT: mv a4, a3
+; RV32I-NEXT: beqz t1, .LBB21_16
+; RV32I-NEXT: # %bb.15:
; RV32I-NEXT: mv a4, t3
-; RV32I-NEXT: .LBB21_15:
-; RV32I-NEXT: bltu t2, a5, .LBB21_17
-; RV32I-NEXT: # %bb.16:
+; RV32I-NEXT: .LBB21_16:
+; RV32I-NEXT: bltu t2, a5, .LBB21_18
+; RV32I-NEXT: # %bb.17:
; RV32I-NEXT: srl t1, a3, t2
-; RV32I-NEXT: bnez t2, .LBB21_18
-; RV32I-NEXT: j .LBB21_19
-; RV32I-NEXT: .LBB21_17:
+; RV32I-NEXT: bnez t2, .LBB21_19
+; RV32I-NEXT: j .LBB21_20
+; RV32I-NEXT: .LBB21_18:
; RV32I-NEXT: srl t1, a2, a7
; RV32I-NEXT: sll t3, a3, t4
; RV32I-NEXT: or t1, t1, t3
-; RV32I-NEXT: beqz t2, .LBB21_19
-; RV32I-NEXT: .LBB21_18:
-; RV32I-NEXT: mv a2, t1
+; RV32I-NEXT: beqz t2, .LBB21_20
; RV32I-NEXT: .LBB21_19:
+; RV32I-NEXT: mv a2, t1
+; RV32I-NEXT: .LBB21_20:
; RV32I-NEXT: or a0, a6, a0
; RV32I-NEXT: or a6, t0, t5
-; RV32I-NEXT: bltu t2, a5, .LBB21_21
-; RV32I-NEXT: # %bb.20:
+; RV32I-NEXT: bltu t2, a5, .LBB21_22
+; RV32I-NEXT: # %bb.21:
; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: j .LBB21_22
-; RV32I-NEXT: .LBB21_21:
-; RV32I-NEXT: srl a3, a3, a7
+; RV32I-NEXT: j .LBB21_23
; RV32I-NEXT: .LBB21_22:
+; RV32I-NEXT: srl a3, a3, a7
+; RV32I-NEXT: .LBB21_23:
; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: add a0, a0, a1
@@ -2518,42 +2505,41 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32ZBB-NEXT: srl t3, a2, t3
; RV32ZBB-NEXT: sll a4, a3, a4
; RV32ZBB-NEXT: or t3, t3, a4
-; RV32ZBB-NEXT: mv a4, a3
-; RV32ZBB-NEXT: bnez t1, .LBB21_14
-; RV32ZBB-NEXT: j .LBB21_15
+; RV32ZBB-NEXT: j .LBB21_14
; RV32ZBB-NEXT: .LBB21_12:
; RV32ZBB-NEXT: srl t5, a1, a7
; RV32ZBB-NEXT: bltu t1, a5, .LBB21_11
; RV32ZBB-NEXT: .LBB21_13:
; RV32ZBB-NEXT: li a1, 0
; RV32ZBB-NEXT: sll t3, a2, t1
-; RV32ZBB-NEXT: mv a4, a3
-; RV32ZBB-NEXT: beqz t1, .LBB21_15
; RV32ZBB-NEXT: .LBB21_14:
+; RV32ZBB-NEXT: mv a4, a3
+; RV32ZBB-NEXT: beqz t1, .LBB21_16
+; RV32ZBB-NEXT: # %bb.15:
; RV32ZBB-NEXT: mv a4, t3
-; RV32ZBB-NEXT: .LBB21_15:
-; RV32ZBB-NEXT: bltu t2, a5, .LBB21_17
-; RV32ZBB-NEXT: # %bb.16:
+; RV32ZBB-NEXT: .LBB21_16:
+; RV32ZBB-NEXT: bltu t2, a5, .LBB21_18
+; RV32ZBB-NEXT: # %bb.17:
; RV32ZBB-NEXT: srl t1, a3, t2
-; RV32ZBB-NEXT: bnez t2, .LBB21_18
-; RV32ZBB-NEXT: j .LBB21_19
-; RV32ZBB-NEXT: .LBB21_17:
+; RV32ZBB-NEXT: bnez t2, .LBB21_19
+; RV32ZBB-NEXT: j .LBB21_20
+; RV32ZBB-NEXT: .LBB21_18:
; RV32ZBB-NEXT: srl t1, a2, a7
; RV32ZBB-NEXT: sll t3, a3, t4
; RV32ZBB-NEXT: or t1, t1, t3
-; RV32ZBB-NEXT: beqz t2, .LBB21_19
-; RV32ZBB-NEXT: .LBB21_18:
-; RV32ZBB-NEXT: mv a2, t1
+; RV32ZBB-NEXT: beqz t2, .LBB21_20
; RV32ZBB-NEXT: .LBB21_19:
+; RV32ZBB-NEXT: mv a2, t1
+; RV32ZBB-NEXT: .LBB21_20:
; RV32ZBB-NEXT: or a0, a6, a0
; RV32ZBB-NEXT: or a6, t0, t5
-; RV32ZBB-NEXT: bltu t2, a5, .LBB21_21
-; RV32ZBB-NEXT: # %bb.20:
+; RV32ZBB-NEXT: bltu t2, a5, .LBB21_22
+; RV32ZBB-NEXT: # %bb.21:
; RV32ZBB-NEXT: li a3, 0
-; RV32ZBB-NEXT: j .LBB21_22
-; RV32ZBB-NEXT: .LBB21_21:
-; RV32ZBB-NEXT: srl a3, a3, a7
+; RV32ZBB-NEXT: j .LBB21_23
; RV32ZBB-NEXT: .LBB21_22:
+; RV32ZBB-NEXT: srl a3, a3, a7
+; RV32ZBB-NEXT: .LBB21_23:
; RV32ZBB-NEXT: or a1, a1, a2
; RV32ZBB-NEXT: or a3, a4, a3
; RV32ZBB-NEXT: add a0, a0, a1
@@ -2615,42 +2601,41 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32XTHEADBB-NEXT: srl t3, a2, t3
; RV32XTHEADBB-NEXT: sll a4, a3, a4
; RV32XTHEADBB-NEXT: or t3, t3, a4
-; RV32XTHEADBB-NEXT: mv a4, a3
-; RV32XTHEADBB-NEXT: bnez t1, .LBB21_14
-; RV32XTHEADBB-NEXT: j .LBB21_15
+; RV32XTHEADBB-NEXT: j .LBB21_14
; RV32XTHEADBB-NEXT: .LBB21_12:
; RV32XTHEADBB-NEXT: srl t5, a1, a7
; RV32XTHEADBB-NEXT: bltu t1, a5, .LBB21_11
; RV32XTHEADBB-NEXT: .LBB21_13:
; RV32XTHEADBB-NEXT: li a1, 0
; RV32XTHEADBB-NEXT: sll t3, a2, t1
-; RV32XTHEADBB-NEXT: mv a4, a3
-; RV32XTHEADBB-NEXT: beqz t1, .LBB21_15
; RV32XTHEADBB-NEXT: .LBB21_14:
+; RV32XTHEADBB-NEXT: mv a4, a3
+; RV32XTHEADBB-NEXT: beqz t1, .LBB21_16
+; RV32XTHEADBB-NEXT: # %bb.15:
; RV32XTHEADBB-NEXT: mv a4, t3
-; RV32XTHEADBB-NEXT: .LBB21_15:
-; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_17
-; RV32XTHEADBB-NEXT: # %bb.16:
+; RV32XTHEADBB-NEXT: .LBB21_16:
+; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_18
+; RV32XTHEADBB-NEXT: # %bb.17:
; RV32XTHEADBB-NEXT: srl t1, a3, t2
-; RV32XTHEADBB-NEXT: bnez t2, .LBB21_18
-; RV32XTHEADBB-NEXT: j .LBB21_19
-; RV32XTHEADBB-NEXT: .LBB21_17:
+; RV32XTHEADBB-NEXT: bnez t2, .LBB21_19
+; RV32XTHEADBB-NEXT: j .LBB21_20
+; RV32XTHEADBB-NEXT: .LBB21_18:
; RV32XTHEADBB-NEXT: srl t1, a2, a7
; RV32XTHEADBB-NEXT: sll t3, a3, t4
; RV32XTHEADBB-NEXT: or t1, t1, t3
-; RV32XTHEADBB-NEXT: beqz t2, .LBB21_19
-; RV32XTHEADBB-NEXT: .LBB21_18:
-; RV32XTHEADBB-NEXT: mv a2, t1
+; RV32XTHEADBB-NEXT: beqz t2, .LBB21_20
; RV32XTHEADBB-NEXT: .LBB21_19:
+; RV32XTHEADBB-NEXT: mv a2, t1
+; RV32XTHEADBB-NEXT: .LBB21_20:
; RV32XTHEADBB-NEXT: or a0, a6, a0
; RV32XTHEADBB-NEXT: or a6, t0, t5
-; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_21
-; RV32XTHEADBB-NEXT: # %bb.20:
+; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_22
+; RV32XTHEADBB-NEXT: # %bb.21:
; RV32XTHEADBB-NEXT: li a3, 0
-; RV32XTHEADBB-NEXT: j .LBB21_22
-; RV32XTHEADBB-NEXT: .LBB21_21:
-; RV32XTHEADBB-NEXT: srl a3, a3, a7
+; RV32XTHEADBB-NEXT: j .LBB21_23
; RV32XTHEADBB-NEXT: .LBB21_22:
+; RV32XTHEADBB-NEXT: srl a3, a3, a7
+; RV32XTHEADBB-NEXT: .LBB21_23:
; RV32XTHEADBB-NEXT: or a1, a1, a2
; RV32XTHEADBB-NEXT: or a3, a4, a3
; RV32XTHEADBB-NEXT: add a0, a0, a1
@@ -2762,81 +2747,79 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32I-NEXT: bltu t0, a6, .LBB23_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a7, a1, t0
-; RV32I-NEXT: mv a5, a0
-; RV32I-NEXT: bnez t0, .LBB23_3
-; RV32I-NEXT: j .LBB23_4
+; RV32I-NEXT: j .LBB23_3
; RV32I-NEXT: .LBB23_2:
; RV32I-NEXT: srl a5, a0, a4
; RV32I-NEXT: sll a7, a1, t4
; RV32I-NEXT: or a7, a5, a7
-; RV32I-NEXT: mv a5, a0
-; RV32I-NEXT: beqz t0, .LBB23_4
; RV32I-NEXT: .LBB23_3:
+; RV32I-NEXT: mv a5, a0
+; RV32I-NEXT: beqz t0, .LBB23_5
+; RV32I-NEXT: # %bb.4:
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB23_4:
+; RV32I-NEXT: .LBB23_5:
; RV32I-NEXT: neg t2, t0
-; RV32I-NEXT: bltu t0, a6, .LBB23_6
-; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: bltu t0, a6, .LBB23_7
+; RV32I-NEXT: # %bb.6:
; RV32I-NEXT: li a7, 0
-; RV32I-NEXT: j .LBB23_7
-; RV32I-NEXT: .LBB23_6:
-; RV32I-NEXT: srl a7, a1, a4
+; RV32I-NEXT: j .LBB23_8
; RV32I-NEXT: .LBB23_7:
+; RV32I-NEXT: srl a7, a1, a4
+; RV32I-NEXT: .LBB23_8:
; RV32I-NEXT: andi t1, t2, 63
; RV32I-NEXT: neg t5, t1
-; RV32I-NEXT: bltu t1, a6, .LBB23_9
-; RV32I-NEXT: # %bb.8:
+; RV32I-NEXT: bltu t1, a6, .LBB23_10
+; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: li t3, 0
; RV32I-NEXT: sll a0, a0, t1
-; RV32I-NEXT: bnez t1, .LBB23_10
-; RV32I-NEXT: j .LBB23_11
-; RV32I-NEXT: .LBB23_9:
+; RV32I-NEXT: bnez t1, .LBB23_11
+; RV32I-NEXT: j .LBB23_12
+; RV32I-NEXT: .LBB23_10:
; RV32I-NEXT: sll t3, a0, t2
; RV32I-NEXT: srl a0, a0, t5
; RV32I-NEXT: sll t6, a1, t2
; RV32I-NEXT: or a0, a0, t6
-; RV32I-NEXT: beqz t1, .LBB23_11
-; RV32I-NEXT: .LBB23_10:
-; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: beqz t1, .LBB23_12
; RV32I-NEXT: .LBB23_11:
-; RV32I-NEXT: bltu t0, a6, .LBB23_13
-; RV32I-NEXT: # %bb.12:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB23_12:
+; RV32I-NEXT: bltu t0, a6, .LBB23_14
+; RV32I-NEXT: # %bb.13:
; RV32I-NEXT: srl t4, a3, t0
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez t0, .LBB23_14
; RV32I-NEXT: j .LBB23_15
-; RV32I-NEXT: .LBB23_13:
+; RV32I-NEXT: .LBB23_14:
; RV32I-NEXT: srl a0, a2, a4
; RV32I-NEXT: sll t4, a3, t4
; RV32I-NEXT: or t4, a0, t4
-; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz t0, .LBB23_15
-; RV32I-NEXT: .LBB23_14:
-; RV32I-NEXT: mv a0, t4
; RV32I-NEXT: .LBB23_15:
-; RV32I-NEXT: bltu t0, a6, .LBB23_18
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: beqz t0, .LBB23_17
; RV32I-NEXT: # %bb.16:
-; RV32I-NEXT: li a4, 0
-; RV32I-NEXT: bgeu t1, a6, .LBB23_19
+; RV32I-NEXT: mv a0, t4
; RV32I-NEXT: .LBB23_17:
+; RV32I-NEXT: bltu t0, a6, .LBB23_20
+; RV32I-NEXT: # %bb.18:
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: bgeu t1, a6, .LBB23_21
+; RV32I-NEXT: .LBB23_19:
; RV32I-NEXT: sll a6, a2, t2
; RV32I-NEXT: srl a2, a2, t5
; RV32I-NEXT: sll t0, a3, t2
; RV32I-NEXT: or a2, a2, t0
-; RV32I-NEXT: j .LBB23_20
-; RV32I-NEXT: .LBB23_18:
+; RV32I-NEXT: j .LBB23_22
+; RV32I-NEXT: .LBB23_20:
; RV32I-NEXT: srl a4, a3, a4
-; RV32I-NEXT: bltu t1, a6, .LBB23_17
-; RV32I-NEXT: .LBB23_19:
+; RV32I-NEXT: bltu t1, a6, .LBB23_19
+; RV32I-NEXT: .LBB23_21:
; RV32I-NEXT: li a6, 0
; RV32I-NEXT: sll a2, a2, t1
-; RV32I-NEXT: .LBB23_20:
+; RV32I-NEXT: .LBB23_22:
; RV32I-NEXT: or a5, a5, t3
; RV32I-NEXT: or a1, a7, a1
-; RV32I-NEXT: beqz t1, .LBB23_22
-; RV32I-NEXT: # %bb.21:
+; RV32I-NEXT: beqz t1, .LBB23_24
+; RV32I-NEXT: # %bb.23:
; RV32I-NEXT: mv a3, a2
-; RV32I-NEXT: .LBB23_22:
+; RV32I-NEXT: .LBB23_24:
; RV32I-NEXT: or a2, a0, a6
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: add a0, a5, a2
@@ -2866,81 +2849,79 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32ZBB-NEXT: bltu t0, a6, .LBB23_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: srl a7, a1, t0
-; RV32ZBB-NEXT: mv a5, a0
-; RV32ZBB-NEXT: bnez t0, .LBB23_3
-; RV32ZBB-NEXT: j .LBB23_4
+; RV32ZBB-NEXT: j .LBB23_3
; RV32ZBB-NEXT: .LBB23_2:
; RV32ZBB-NEXT: srl a5, a0, a4
; RV32ZBB-NEXT: sll a7, a1, t4
; RV32ZBB-NEXT: or a7, a5, a7
-; RV32ZBB-NEXT: mv a5, a0
-; RV32ZBB-NEXT: beqz t0, .LBB23_4
; RV32ZBB-NEXT: .LBB23_3:
+; RV32ZBB-NEXT: mv a5, a0
+; RV32ZBB-NEXT: beqz t0, .LBB23_5
+; RV32ZBB-NEXT: # %bb.4:
; RV32ZBB-NEXT: mv a5, a7
-; RV32ZBB-NEXT: .LBB23_4:
+; RV32ZBB-NEXT: .LBB23_5:
; RV32ZBB-NEXT: neg t2, t0
-; RV32ZBB-NEXT: bltu t0, a6, .LBB23_6
-; RV32ZBB-NEXT: # %bb.5:
+; RV32ZBB-NEXT: bltu t0, a6, .LBB23_7
+; RV32ZBB-NEXT: # %bb.6:
; RV32ZBB-NEXT: li a7, 0
-; RV32ZBB-NEXT: j .LBB23_7
-; RV32ZBB-NEXT: .LBB23_6:
-; RV32ZBB-NEXT: srl a7, a1, a4
+; RV32ZBB-NEXT: j .LBB23_8
; RV32ZBB-NEXT: .LBB23_7:
+; RV32ZBB-NEXT: srl a7, a1, a4
+; RV32ZBB-NEXT: .LBB23_8:
; RV32ZBB-NEXT: andi t1, t2, 63
; RV32ZBB-NEXT: neg t5, t1
-; RV32ZBB-NEXT: bltu t1, a6, .LBB23_9
-; RV32ZBB-NEXT: # %bb.8:
+; RV32ZBB-NEXT: bltu t1, a6, .LBB23_10
+; RV32ZBB-NEXT: # %bb.9:
; RV32ZBB-NEXT: li t3, 0
; RV32ZBB-NEXT: sll a0, a0, t1
-; RV32ZBB-NEXT: bnez t1, .LBB23_10
-; RV32ZBB-NEXT: j .LBB23_11
-; RV32ZBB-NEXT: .LBB23_9:
+; RV32ZBB-NEXT: bnez t1, .LBB23_11
+; RV32ZBB-NEXT: j .LBB23_12
+; RV32ZBB-NEXT: .LBB23_10:
; RV32ZBB-NEXT: sll t3, a0, t2
; RV32ZBB-NEXT: srl a0, a0, t5
; RV32ZBB-NEXT: sll t6, a1, t2
; RV32ZBB-NEXT: or a0, a0, t6
-; RV32ZBB-NEXT: beqz t1, .LBB23_11
-; RV32ZBB-NEXT: .LBB23_10:
-; RV32ZBB-NEXT: mv a1, a0
+; RV32ZBB-NEXT: beqz t1, .LBB23_12
; RV32ZBB-NEXT: .LBB23_11:
-; RV32ZBB-NEXT: bltu t0, a6, .LBB23_13
-; RV32ZBB-NEXT: # %bb.12:
+; RV32ZBB-NEXT: mv a1, a0
+; RV32ZBB-NEXT: .LBB23_12:
+; RV32ZBB-NEXT: bltu t0, a6, .LBB23_14
+; RV32ZBB-NEXT: # %bb.13:
; RV32ZBB-NEXT: srl t4, a3, t0
-; RV32ZBB-NEXT: mv a0, a2
-; RV32ZBB-NEXT: bnez t0, .LBB23_14
; RV32ZBB-NEXT: j .LBB23_15
-; RV32ZBB-NEXT: .LBB23_13:
+; RV32ZBB-NEXT: .LBB23_14:
; RV32ZBB-NEXT: srl a0, a2, a4
; RV32ZBB-NEXT: sll t4, a3, t4
; RV32ZBB-NEXT: or t4, a0, t4
-; RV32ZBB-NEXT: mv a0, a2
-; RV32ZBB-NEXT: beqz t0, .LBB23_15
-; RV32ZBB-NEXT: .LBB23_14:
-; RV32ZBB-NEXT: mv a0, t4
; RV32ZBB-NEXT: .LBB23_15:
-; RV32ZBB-NEXT: bltu t0, a6, .LBB23_18
+; RV32ZBB-NEXT: mv a0, a2
+; RV32ZBB-NEXT: beqz t0, .LBB23_17
; RV32ZBB-NEXT: # %bb.16:
-; RV32ZBB-NEXT: li a4, 0
-; RV32ZBB-NEXT: bgeu t1, a6, .LBB23_19
+; RV32ZBB-NEXT: mv a0, t4
; RV32ZBB-NEXT: .LBB23_17:
+; RV32ZBB-NEXT: bltu t0, a6, .LBB23_20
+; RV32ZBB-NEXT: # %bb.18:
+; RV32ZBB-NEXT: li a4, 0
+; RV32ZBB-NEXT: bgeu t1, a6, .LBB23_21
+; RV32ZBB-NEXT: .LBB23_19:
; RV32ZBB-NEXT: sll a6, a2, t2
; RV32ZBB-NEXT: srl a2, a2, t5
; RV32ZBB-NEXT: sll t0, a3, t2
; RV32ZBB-NEXT: or a2, a2, t0
-; RV32ZBB-NEXT: j .LBB23_20
-; RV32ZBB-NEXT: .LBB23_18:
+; RV32ZBB-NEXT: j .LBB23_22
+; RV32ZBB-NEXT: .LBB23_20:
; RV32ZBB-NEXT: srl a4, a3, a4
-; RV32ZBB-NEXT: bltu t1, a6, .LBB23_17
-; RV32ZBB-NEXT: .LBB23_19:
+; RV32ZBB-NEXT: bltu t1, a6, .LBB23_19
+; RV32ZBB-NEXT: .LBB23_21:
; RV32ZBB-NEXT: li a6, 0
; RV32ZBB-NEXT: sll a2, a2, t1
-; RV32ZBB-NEXT: .LBB23_20:
+; RV32ZBB-NEXT: .LBB23_22:
; RV32ZBB-NEXT: or a5, a5, t3
; RV32ZBB-NEXT: or a1, a7, a1
-; RV32ZBB-NEXT: beqz t1, .LBB23_22
-; RV32ZBB-NEXT: # %bb.21:
+; RV32ZBB-NEXT: beqz t1, .LBB23_24
+; RV32ZBB-NEXT: # %bb.23:
; RV32ZBB-NEXT: mv a3, a2
-; RV32ZBB-NEXT: .LBB23_22:
+; RV32ZBB-NEXT: .LBB23_24:
; RV32ZBB-NEXT: or a2, a0, a6
; RV32ZBB-NEXT: or a3, a4, a3
; RV32ZBB-NEXT: add a0, a5, a2
@@ -2964,81 +2945,79 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_2
; RV32XTHEADBB-NEXT: # %bb.1:
; RV32XTHEADBB-NEXT: srl a7, a1, t0
-; RV32XTHEADBB-NEXT: mv a5, a0
-; RV32XTHEADBB-NEXT: bnez t0, .LBB23_3
-; RV32XTHEADBB-NEXT: j .LBB23_4
+; RV32XTHEADBB-NEXT: j .LBB23_3
; RV32XTHEADBB-NEXT: .LBB23_2:
; RV32XTHEADBB-NEXT: srl a5, a0, a4
; RV32XTHEADBB-NEXT: sll a7, a1, t4
; RV32XTHEADBB-NEXT: or a7, a5, a7
-; RV32XTHEADBB-NEXT: mv a5, a0
-; RV32XTHEADBB-NEXT: beqz t0, .LBB23_4
; RV32XTHEADBB-NEXT: .LBB23_3:
+; RV32XTHEADBB-NEXT: mv a5, a0
+; RV32XTHEADBB-NEXT: beqz t0, .LBB23_5
+; RV32XTHEADBB-NEXT: # %bb.4:
; RV32XTHEADBB-NEXT: mv a5, a7
-; RV32XTHEADBB-NEXT: .LBB23_4:
+; RV32XTHEADBB-NEXT: .LBB23_5:
; RV32XTHEADBB-NEXT: neg t2, t0
-; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_6
-; RV32XTHEADBB-NEXT: # %bb.5:
+; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_7
+; RV32XTHEADBB-NEXT: # %bb.6:
; RV32XTHEADBB-NEXT: li a7, 0
-; RV32XTHEADBB-NEXT: j .LBB23_7
-; RV32XTHEADBB-NEXT: .LBB23_6:
-; RV32XTHEADBB-NEXT: srl a7, a1, a4
+; RV32XTHEADBB-NEXT: j .LBB23_8
; RV32XTHEADBB-NEXT: .LBB23_7:
+; RV32XTHEADBB-NEXT: srl a7, a1, a4
+; RV32XTHEADBB-NEXT: .LBB23_8:
; RV32XTHEADBB-NEXT: andi t1, t2, 63
; RV32XTHEADBB-NEXT: neg t5, t1
-; RV32XTHEADBB-NEXT: bltu t1, a6, .LBB23_9
-; RV32XTHEADBB-NEXT: # %bb.8:
+; RV32XTHEADBB-NEXT: bltu t1, a6, .LBB23_10
+; RV32XTHEADBB-NEXT: # %bb.9:
; RV32XTHEADBB-NEXT: li t3, 0
; RV32XTHEADBB-NEXT: sll a0, a0, t1
-; RV32XTHEADBB-NEXT: bnez t1, .LBB23_10
-; RV32XTHEADBB-NEXT: j .LBB23_11
-; RV32XTHEADBB-NEXT: .LBB23_9:
+; RV32XTHEADBB-NEXT: bnez t1, .LBB23_11
+; RV32XTHEADBB-NEXT: j .LBB23_12
+; RV32XTHEADBB-NEXT: .LBB23_10:
; RV32XTHEADBB-NEXT: sll t3, a0, t2
; RV32XTHEADBB-NEXT: srl a0, a0, t5
; RV32XTHEADBB-NEXT: sll t6, a1, t2
; RV32XTHEADBB-NEXT: or a0, a0, t6
-; RV32XTHEADBB-NEXT: beqz t1, .LBB23_11
-; RV32XTHEADBB-NEXT: .LBB23_10:
-; RV32XTHEADBB-NEXT: mv a1, a0
+; RV32XTHEADBB-NEXT: beqz t1, .LBB23_12
; RV32XTHEADBB-NEXT: .LBB23_11:
-; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_13
-; RV32XTHEADBB-NEXT: # %bb.12:
+; RV32XTHEADBB-NEXT: mv a1, a0
+; RV32XTHEADBB-NEXT: .LBB23_12:
+; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_14
+; RV32XTHEADBB-NEXT: # %bb.13:
; RV32XTHEADBB-NEXT: srl t4, a3, t0
-; RV32XTHEADBB-NEXT: mv a0, a2
-; RV32XTHEADBB-NEXT: bnez t0, .LBB23_14
; RV32XTHEADBB-NEXT: j .LBB23_15
-; RV32XTHEADBB-NEXT: .LBB23_13:
+; RV32XTHEADBB-NEXT: .LBB23_14:
; RV32XTHEADBB-NEXT: srl a0, a2, a4
; RV32XTHEADBB-NEXT: sll t4, a3, t4
; RV32XTHEADBB-NEXT: or t4, a0, t4
-; RV32XTHEADBB-NEXT: mv a0, a2
-; RV32XTHEADBB-NEXT: beqz t0, .LBB23_15
-; RV32XTHEADBB-NEXT: .LBB23_14:
-; RV32XTHEADBB-NEXT: mv a0, t4
; RV32XTHEADBB-NEXT: .LBB23_15:
-; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_18
+; RV32XTHEADBB-NEXT: mv a0, a2
+; RV32XTHEADBB-NEXT: beqz t0, .LBB23_17
; RV32XTHEADBB-NEXT: # %bb.16:
-; RV32XTHEADBB-NEXT: li a4, 0
-; RV32XTHEADBB-NEXT: bgeu t1, a6, .LBB23_19
+; RV32XTHEADBB-NEXT: mv a0, t4
; RV32XTHEADBB-NEXT: .LBB23_17:
+; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_20
+; RV32XTHEADBB-NEXT: # %bb.18:
+; RV32XTHEADBB-NEXT: li a4, 0
+; RV32XTHEADBB-NEXT: bgeu t1, a6, .LBB23_21
+; RV32XTHEADBB-NEXT: .LBB23_19:
; RV32XTHEADBB-NEXT: sll a6, a2, t2
; RV32XTHEADBB-NEXT: srl a2, a2, t5
; RV32XTHEADBB-NEXT: sll t0, a3, t2
; RV32XTHEADBB-NEXT: or a2, a2, t0
-; RV32XTHEADBB-NEXT: j .LBB23_20
-; RV32XTHEADBB-NEXT: .LBB23_18:
+; RV32XTHEADBB-NEXT: j .LBB23_22
+; RV32XTHEADBB-NEXT: .LBB23_20:
; RV32XTHEADBB-NEXT: srl a4, a3, a4
-; RV32XTHEADBB-NEXT: bltu t1, a6, .LBB23_17
-; RV32XTHEADBB-NEXT: .LBB23_19:
+; RV32XTHEADBB-NEXT: bltu t1, a6, .LBB23_19
+; RV32XTHEADBB-NEXT: .LBB23_21:
; RV32XTHEADBB-NEXT: li a6, 0
; RV32XTHEADBB-NEXT: sll a2, a2, t1
-; RV32XTHEADBB-NEXT: .LBB23_20:
+; RV32XTHEADBB-NEXT: .LBB23_22:
; RV32XTHEADBB-NEXT: or a5, a5, t3
; RV32XTHEADBB-NEXT: or a1, a7, a1
-; RV32XTHEADBB-NEXT: beqz t1, .LBB23_22
-; RV32XTHEADBB-NEXT: # %bb.21:
+; RV32XTHEADBB-NEXT: beqz t1, .LBB23_24
+; RV32XTHEADBB-NEXT: # %bb.23:
; RV32XTHEADBB-NEXT: mv a3, a2
-; RV32XTHEADBB-NEXT: .LBB23_22:
+; RV32XTHEADBB-NEXT: .LBB23_24:
; RV32XTHEADBB-NEXT: or a2, a0, a6
; RV32XTHEADBB-NEXT: or a3, a4, a3
; RV32XTHEADBB-NEXT: add a0, a5, a2
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
index ababec16f7f8f..87dda43a09020 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
@@ -219,44 +219,43 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: bltu a5, a4, .LBB9_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: srl a6, a1, a5
-; CHECK-NEXT: mv a3, a0
-; CHECK-NEXT: bnez a5, .LBB9_3
-; CHECK-NEXT: j .LBB9_4
+; CHECK-NEXT: j .LBB9_3
; CHECK-NEXT: .LBB9_2:
; CHECK-NEXT: srl a3, a0, a2
; CHECK-NEXT: neg a6, a5
; CHECK-NEXT: sll a6, a1, a6
; CHECK-NEXT: or a6, a3, a6
-; CHECK-NEXT: mv a3, a0
-; CHECK-NEXT: beqz a5, .LBB9_4
; CHECK-NEXT: .LBB9_3:
+; CHECK-NEXT: mv a3, a0
+; CHECK-NEXT: beqz a5, .LBB9_5
+; CHECK-NEXT: # %bb.4:
; CHECK-NEXT: mv a3, a6
-; CHECK-NEXT: .LBB9_4:
+; CHECK-NEXT: .LBB9_5:
; CHECK-NEXT: neg a6, a2
-; CHECK-NEXT: bltu a5, a4, .LBB9_7
-; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: bltu a5, a4, .LBB9_9
+; CHECK-NEXT: # %bb.6:
; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: .LBB9_7:
; CHECK-NEXT: andi a5, a6, 63
-; CHECK-NEXT: bgeu a5, a4, .LBB9_8
-; CHECK-NEXT: .LBB9_6:
+; CHECK-NEXT: bgeu a5, a4, .LBB9_10
+; CHECK-NEXT: # %bb.8:
; CHECK-NEXT: sll a4, a0, a6
; CHECK-NEXT: neg a7, a5
; CHECK-NEXT: srl a0, a0, a7
; CHECK-NEXT: sll a6, a1, a6
; CHECK-NEXT: or a0, a0, a6
-; CHECK-NEXT: bnez a5, .LBB9_9
-; CHECK-NEXT: j .LBB9_10
-; CHECK-NEXT: .LBB9_7:
+; CHECK-NEXT: bnez a5, .LBB9_11
+; CHECK-NEXT: j .LBB9_12
+; CHECK-NEXT: .LBB9_9:
; CHECK-NEXT: srl a2, a1, a2
-; CHECK-NEXT: andi a5, a6, 63
-; CHECK-NEXT: bltu a5, a4, .LBB9_6
-; CHECK-NEXT: .LBB9_8:
+; CHECK-NEXT: j .LBB9_7
+; CHECK-NEXT: .LBB9_10:
; CHECK-NEXT: li a4, 0
; CHECK-NEXT: sll a0, a0, a5
-; CHECK-NEXT: beqz a5, .LBB9_10
-; CHECK-NEXT: .LBB9_9:
+; CHECK-NEXT: beqz a5, .LBB9_12
+; CHECK-NEXT: .LBB9_11:
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: .LBB9_10:
+; CHECK-NEXT: .LBB9_12:
; CHECK-NEXT: or a0, a3, a4
; CHECK-NEXT: or a1, a2, a1
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
index 95af7861d4798..74ec7308cb646 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
@@ -357,11 +357,6 @@ define i64 @ctpop_i64(i64 %a) nounwind {
define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64_ugt_two:
; RV32I: # %bb.0:
-; RV32I-NEXT: j .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltiu a0, zero, 0
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB6_2:
; RV32I-NEXT: srli a2, a0, 1
; RV32I-NEXT: lui a3, 349525
; RV32I-NEXT: lui a4, 209715
@@ -404,11 +399,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
;
; RV32ZBB-LABEL: ctpop_i64_ugt_two:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: j .LBB6_2
-; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltiu a0, zero, 0
-; RV32ZBB-NEXT: ret
-; RV32ZBB-NEXT: .LBB6_2:
; RV32ZBB-NEXT: cpop a0, a0
; RV32ZBB-NEXT: cpop a1, a1
; RV32ZBB-NEXT: add a0, a1, a0
@@ -422,11 +412,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64_ugt_one:
; RV32I: # %bb.0:
-; RV32I-NEXT: j .LBB7_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: snez a0, zero
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: srli a2, a0, 1
; RV32I-NEXT: lui a3, 349525
; RV32I-NEXT: lui a4, 209715
@@ -470,11 +455,6 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
;
; RV32ZBB-LABEL: ctpop_i64_ugt_one:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: j .LBB7_2
-; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: snez a0, zero
-; RV32ZBB-NEXT: ret
-; RV32ZBB-NEXT: .LBB7_2:
; RV32ZBB-NEXT: cpop a0, a0
; RV32ZBB-NEXT: cpop a1, a1
; RV32ZBB-NEXT: add a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll
index 8b262db56ccd2..15faf278080e3 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll
@@ -221,95 +221,94 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: bltu a2, t0, .LBB6_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a5, a7, a2
-; RV32I-NEXT: mv a4, a3
-; RV32I-NEXT: bnez a2, .LBB6_3
-; RV32I-NEXT: j .LBB6_4
+; RV32I-NEXT: j .LBB6_3
; RV32I-NEXT: .LBB6_2:
; RV32I-NEXT: or a5, t2, t5
-; RV32I-NEXT: mv a4, a3
-; RV32I-NEXT: beqz a2, .LBB6_4
; RV32I-NEXT: .LBB6_3:
+; RV32I-NEXT: mv a4, a3
+; RV32I-NEXT: beqz a2, .LBB6_5
+; RV32I-NEXT: # %bb.4:
; RV32I-NEXT: mv a4, a5
-; RV32I-NEXT: .LBB6_4:
+; RV32I-NEXT: .LBB6_5:
; RV32I-NEXT: lw a5, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: bltu a2, t0, .LBB6_6
-; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: bltu a2, t0, .LBB6_7
+; RV32I-NEXT: # %bb.6:
; RV32I-NEXT: li a6, 0
; RV32I-NEXT: srl t4, a1, a2
-; RV32I-NEXT: j .LBB6_7
-; RV32I-NEXT: .LBB6_6:
+; RV32I-NEXT: j .LBB6_8
+; RV32I-NEXT: .LBB6_7:
; RV32I-NEXT: srl a6, a7, a2
; RV32I-NEXT: srl t1, a5, a2
; RV32I-NEXT: sll t3, a1, t6
; RV32I-NEXT: or t4, t1, t3
-; RV32I-NEXT: .LBB6_7:
+; RV32I-NEXT: .LBB6_8:
; RV32I-NEXT: li t1, 64
; RV32I-NEXT: mv t3, a5
-; RV32I-NEXT: beqz a2, .LBB6_9
-; RV32I-NEXT: # %bb.8:
+; RV32I-NEXT: beqz a2, .LBB6_10
+; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: mv t3, t4
-; RV32I-NEXT: .LBB6_9:
+; RV32I-NEXT: .LBB6_10:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s0, t1, a2
-; RV32I-NEXT: bltu a2, t0, .LBB6_12
-; RV32I-NEXT: # %bb.10:
+; RV32I-NEXT: bltu a2, t0, .LBB6_13
+; RV32I-NEXT: # %bb.11:
; RV32I-NEXT: li t4, 0
-; RV32I-NEXT: bgeu s0, t0, .LBB6_13
-; RV32I-NEXT: .LBB6_11:
+; RV32I-NEXT: bgeu s0, t0, .LBB6_14
+; RV32I-NEXT: .LBB6_12:
; RV32I-NEXT: sll t6, a3, t6
; RV32I-NEXT: neg s1, s0
; RV32I-NEXT: srl s1, a3, s1
; RV32I-NEXT: or s2, s1, t5
-; RV32I-NEXT: j .LBB6_14
-; RV32I-NEXT: .LBB6_12:
-; RV32I-NEXT: srl t4, a1, a2
-; RV32I-NEXT: bltu s0, t0, .LBB6_11
+; RV32I-NEXT: j .LBB6_15
; RV32I-NEXT: .LBB6_13:
+; RV32I-NEXT: srl t4, a1, a2
+; RV32I-NEXT: bltu s0, t0, .LBB6_12
+; RV32I-NEXT: .LBB6_14:
; RV32I-NEXT: li t6, 0
; RV32I-NEXT: sll s2, a3, s0
-; RV32I-NEXT: .LBB6_14:
+; RV32I-NEXT: .LBB6_15:
; RV32I-NEXT: addi s1, a2, -64
; RV32I-NEXT: mv t5, a7
-; RV32I-NEXT: beqz s0, .LBB6_16
-; RV32I-NEXT: # %bb.15:
+; RV32I-NEXT: beqz s0, .LBB6_17
+; RV32I-NEXT: # %bb.16:
; RV32I-NEXT: mv t5, s2
-; RV32I-NEXT: .LBB6_16:
-; RV32I-NEXT: bltu s1, t0, .LBB6_18
-; RV32I-NEXT: # %bb.17:
+; RV32I-NEXT: .LBB6_17:
+; RV32I-NEXT: bltu s1, t0, .LBB6_19
+; RV32I-NEXT: # %bb.18:
; RV32I-NEXT: srl t2, a7, s1
-; RV32I-NEXT: bnez s1, .LBB6_19
-; RV32I-NEXT: j .LBB6_20
-; RV32I-NEXT: .LBB6_18:
+; RV32I-NEXT: bnez s1, .LBB6_20
+; RV32I-NEXT: j .LBB6_21
+; RV32I-NEXT: .LBB6_19:
; RV32I-NEXT: neg s0, s1
; RV32I-NEXT: sll s0, a7, s0
; RV32I-NEXT: or t2, t2, s0
-; RV32I-NEXT: beqz s1, .LBB6_20
-; RV32I-NEXT: .LBB6_19:
-; RV32I-NEXT: mv a3, t2
+; RV32I-NEXT: beqz s1, .LBB6_21
; RV32I-NEXT: .LBB6_20:
-; RV32I-NEXT: bltu s1, t0, .LBB6_22
-; RV32I-NEXT: # %bb.21:
+; RV32I-NEXT: mv a3, t2
+; RV32I-NEXT: .LBB6_21:
+; RV32I-NEXT: bltu s1, t0, .LBB6_23
+; RV32I-NEXT: # %bb.22:
; RV32I-NEXT: li a7, 0
-; RV32I-NEXT: bltu a2, t1, .LBB6_23
-; RV32I-NEXT: j .LBB6_24
-; RV32I-NEXT: .LBB6_22:
-; RV32I-NEXT: srl a7, a7, a2
-; RV32I-NEXT: bgeu a2, t1, .LBB6_24
+; RV32I-NEXT: bltu a2, t1, .LBB6_24
+; RV32I-NEXT: j .LBB6_25
; RV32I-NEXT: .LBB6_23:
+; RV32I-NEXT: srl a7, a7, a2
+; RV32I-NEXT: bgeu a2, t1, .LBB6_25
+; RV32I-NEXT: .LBB6_24:
; RV32I-NEXT: or a3, t3, t6
; RV32I-NEXT: or a7, t4, t5
-; RV32I-NEXT: .LBB6_24:
-; RV32I-NEXT: bnez a2, .LBB6_28
-; RV32I-NEXT: # %bb.25:
-; RV32I-NEXT: bltu a2, t1, .LBB6_27
-; RV32I-NEXT: .LBB6_26:
+; RV32I-NEXT: .LBB6_25:
+; RV32I-NEXT: bnez a2, .LBB6_29
+; RV32I-NEXT: # %bb.26:
+; RV32I-NEXT: bltu a2, t1, .LBB6_28
+; RV32I-NEXT: .LBB6_27:
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a6, 0
-; RV32I-NEXT: .LBB6_27:
+; RV32I-NEXT: .LBB6_28:
; RV32I-NEXT: sw a5, 0(a0)
; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a4, 8(a0)
@@ -319,11 +318,11 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB6_28:
+; RV32I-NEXT: .LBB6_29:
; RV32I-NEXT: mv a5, a3
; RV32I-NEXT: mv a1, a7
-; RV32I-NEXT: bgeu a2, t1, .LBB6_26
-; RV32I-NEXT: j .LBB6_27
+; RV32I-NEXT: bgeu a2, t1, .LBB6_27
+; RV32I-NEXT: j .LBB6_28
;
; RV64I-LABEL: lshr128:
; RV64I: # %bb.0:
@@ -367,95 +366,94 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: bltu a2, t0, .LBB7_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sra a6, a3, a2
-; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: bnez a2, .LBB7_3
-; RV32I-NEXT: j .LBB7_4
+; RV32I-NEXT: j .LBB7_3
; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: or a6, t2, t5
-; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: beqz a2, .LBB7_4
; RV32I-NEXT: .LBB7_3:
+; RV32I-NEXT: mv a5, a4
+; RV32I-NEXT: beqz a2, .LBB7_5
+; RV32I-NEXT: # %bb.4:
; RV32I-NEXT: mv a5, a6
-; RV32I-NEXT: .LBB7_4:
+; RV32I-NEXT: .LBB7_5:
; RV32I-NEXT: lw a6, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: bltu a2, t0, .LBB7_6
-; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: bltu a2, t0, .LBB7_7
+; RV32I-NEXT: # %bb.6:
; RV32I-NEXT: srai a7, a3, 31
; RV32I-NEXT: srl t4, a1, a2
-; RV32I-NEXT: j .LBB7_7
-; RV32I-NEXT: .LBB7_6:
+; RV32I-NEXT: j .LBB7_8
+; RV32I-NEXT: .LBB7_7:
; RV32I-NEXT: sra a7, a3, a2
; RV32I-NEXT: srl t1, a6, a2
; RV32I-NEXT: sll t3, a1, t6
; RV32I-NEXT: or t4, t1, t3
-; RV32I-NEXT: .LBB7_7:
+; RV32I-NEXT: .LBB7_8:
; RV32I-NEXT: li t1, 64
; RV32I-NEXT: mv t3, a6
-; RV32I-NEXT: beqz a2, .LBB7_9
-; RV32I-NEXT: # %bb.8:
+; RV32I-NEXT: beqz a2, .LBB7_10
+; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: mv t3, t4
-; RV32I-NEXT: .LBB7_9:
+; RV32I-NEXT: .LBB7_10:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s0, t1, a2
-; RV32I-NEXT: bltu a2, t0, .LBB7_12
-; RV32I-NEXT: # %bb.10:
+; RV32I-NEXT: bltu a2, t0, .LBB7_13
+; RV32I-NEXT: # %bb.11:
; RV32I-NEXT: li t4, 0
-; RV32I-NEXT: bgeu s0, t0, .LBB7_13
-; RV32I-NEXT: .LBB7_11:
+; RV32I-NEXT: bgeu s0, t0, .LBB7_14
+; RV32I-NEXT: .LBB7_12:
; RV32I-NEXT: sll t6, a4, t6
; RV32I-NEXT: neg s1, s0
; RV32I-NEXT: srl s1, a4, s1
; RV32I-NEXT: or s2, s1, t5
-; RV32I-NEXT: j .LBB7_14
-; RV32I-NEXT: .LBB7_12:
-; RV32I-NEXT: srl t4, a1, a2
-; RV32I-NEXT: bltu s0, t0, .LBB7_11
+; RV32I-NEXT: j .LBB7_15
; RV32I-NEXT: .LBB7_13:
+; RV32I-NEXT: srl t4, a1, a2
+; RV32I-NEXT: bltu s0, t0, .LBB7_12
+; RV32I-NEXT: .LBB7_14:
; RV32I-NEXT: li t6, 0
; RV32I-NEXT: sll s2, a4, s0
-; RV32I-NEXT: .LBB7_14:
+; RV32I-NEXT: .LBB7_15:
; RV32I-NEXT: addi s1, a2, -64
; RV32I-NEXT: mv t5, a3
-; RV32I-NEXT: beqz s0, .LBB7_16
-; RV32I-NEXT: # %bb.15:
+; RV32I-NEXT: beqz s0, .LBB7_17
+; RV32I-NEXT: # %bb.16:
; RV32I-NEXT: mv t5, s2
-; RV32I-NEXT: .LBB7_16:
-; RV32I-NEXT: bltu s1, t0, .LBB7_18
-; RV32I-NEXT: # %bb.17:
+; RV32I-NEXT: .LBB7_17:
+; RV32I-NEXT: bltu s1, t0, .LBB7_19
+; RV32I-NEXT: # %bb.18:
; RV32I-NEXT: sra t2, a3, s1
-; RV32I-NEXT: bnez s1, .LBB7_19
-; RV32I-NEXT: j .LBB7_20
-; RV32I-NEXT: .LBB7_18:
+; RV32I-NEXT: bnez s1, .LBB7_20
+; RV32I-NEXT: j .LBB7_21
+; RV32I-NEXT: .LBB7_19:
; RV32I-NEXT: neg s0, s1
; RV32I-NEXT: sll s0, a3, s0
; RV32I-NEXT: or t2, t2, s0
-; RV32I-NEXT: beqz s1, .LBB7_20
-; RV32I-NEXT: .LBB7_19:
-; RV32I-NEXT: mv a4, t2
+; RV32I-NEXT: beqz s1, .LBB7_21
; RV32I-NEXT: .LBB7_20:
-; RV32I-NEXT: bltu s1, t0, .LBB7_22
-; RV32I-NEXT: # %bb.21:
+; RV32I-NEXT: mv a4, t2
+; RV32I-NEXT: .LBB7_21:
+; RV32I-NEXT: bltu s1, t0, .LBB7_23
+; RV32I-NEXT: # %bb.22:
; RV32I-NEXT: srai t0, a3, 31
-; RV32I-NEXT: bltu a2, t1, .LBB7_23
-; RV32I-NEXT: j .LBB7_24
-; RV32I-NEXT: .LBB7_22:
-; RV32I-NEXT: sra t0, a3, a2
-; RV32I-NEXT: bgeu a2, t1, .LBB7_24
+; RV32I-NEXT: bltu a2, t1, .LBB7_24
+; RV32I-NEXT: j .LBB7_25
; RV32I-NEXT: .LBB7_23:
+; RV32I-NEXT: sra t0, a3, a2
+; RV32I-NEXT: bgeu a2, t1, .LBB7_25
+; RV32I-NEXT: .LBB7_24:
; RV32I-NEXT: or a4, t3, t6
; RV32I-NEXT: or t0, t4, t5
-; RV32I-NEXT: .LBB7_24:
-; RV32I-NEXT: bnez a2, .LBB7_28
-; RV32I-NEXT: # %bb.25:
-; RV32I-NEXT: bltu a2, t1, .LBB7_27
-; RV32I-NEXT: .LBB7_26:
+; RV32I-NEXT: .LBB7_25:
+; RV32I-NEXT: bnez a2, .LBB7_29
+; RV32I-NEXT: # %bb.26:
+; RV32I-NEXT: bltu a2, t1, .LBB7_28
+; RV32I-NEXT: .LBB7_27:
; RV32I-NEXT: srai a5, a3, 31
; RV32I-NEXT: mv a7, a5
-; RV32I-NEXT: .LBB7_27:
+; RV32I-NEXT: .LBB7_28:
; RV32I-NEXT: sw a6, 0(a0)
; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a5, 8(a0)
@@ -465,11 +463,11 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB7_28:
+; RV32I-NEXT: .LBB7_29:
; RV32I-NEXT: mv a6, a4
; RV32I-NEXT: mv a1, t0
-; RV32I-NEXT: bgeu a2, t1, .LBB7_26
-; RV32I-NEXT: j .LBB7_27
+; RV32I-NEXT: bgeu a2, t1, .LBB7_27
+; RV32I-NEXT: j .LBB7_28
;
; RV64I-LABEL: ashr128:
; RV64I: # %bb.0:
@@ -529,76 +527,75 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: bltu t4, t1, .LBB8_7
; RV32I-NEXT: # %bb.6:
; RV32I-NEXT: srl t2, a3, t4
-; RV32I-NEXT: mv t3, a7
-; RV32I-NEXT: bnez t4, .LBB8_8
-; RV32I-NEXT: j .LBB8_9
+; RV32I-NEXT: j .LBB8_8
; RV32I-NEXT: .LBB8_7:
; RV32I-NEXT: neg t3, t4
; RV32I-NEXT: sll t3, a3, t3
; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: mv t3, a7
-; RV32I-NEXT: beqz t4, .LBB8_9
; RV32I-NEXT: .LBB8_8:
+; RV32I-NEXT: mv t3, a7
+; RV32I-NEXT: beqz t4, .LBB8_10
+; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: mv t3, t2
-; RV32I-NEXT: .LBB8_9:
-; RV32I-NEXT: bltu t4, t1, .LBB8_11
-; RV32I-NEXT: # %bb.10:
+; RV32I-NEXT: .LBB8_10:
+; RV32I-NEXT: bltu t4, t1, .LBB8_12
+; RV32I-NEXT: # %bb.11:
; RV32I-NEXT: li t4, 0
-; RV32I-NEXT: j .LBB8_12
-; RV32I-NEXT: .LBB8_11:
-; RV32I-NEXT: srl t4, a3, t5
+; RV32I-NEXT: j .LBB8_13
; RV32I-NEXT: .LBB8_12:
+; RV32I-NEXT: srl t4, a3, t5
+; RV32I-NEXT: .LBB8_13:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw t2, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: bltu a2, t1, .LBB8_14
-; RV32I-NEXT: # %bb.13:
+; RV32I-NEXT: bltu a2, t1, .LBB8_15
+; RV32I-NEXT: # %bb.14:
; RV32I-NEXT: li t6, 0
; RV32I-NEXT: sll s1, t2, a2
-; RV32I-NEXT: j .LBB8_15
-; RV32I-NEXT: .LBB8_14:
+; RV32I-NEXT: j .LBB8_16
+; RV32I-NEXT: .LBB8_15:
; RV32I-NEXT: sll t6, t2, a2
; RV32I-NEXT: srl t5, t2, t5
; RV32I-NEXT: sll s0, a1, a2
; RV32I-NEXT: or s1, t5, s0
-; RV32I-NEXT: .LBB8_15:
+; RV32I-NEXT: .LBB8_16:
; RV32I-NEXT: addi s0, a2, -64
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: beqz a2, .LBB8_17
-; RV32I-NEXT: # %bb.16:
+; RV32I-NEXT: beqz a2, .LBB8_18
+; RV32I-NEXT: # %bb.17:
; RV32I-NEXT: mv t5, s1
-; RV32I-NEXT: .LBB8_17:
-; RV32I-NEXT: bltu s0, t1, .LBB8_19
-; RV32I-NEXT: # %bb.18:
+; RV32I-NEXT: .LBB8_18:
+; RV32I-NEXT: bltu s0, t1, .LBB8_20
+; RV32I-NEXT: # %bb.19:
; RV32I-NEXT: li t1, 0
; RV32I-NEXT: sll a7, a7, s0
-; RV32I-NEXT: bnez s0, .LBB8_20
-; RV32I-NEXT: j .LBB8_21
-; RV32I-NEXT: .LBB8_19:
+; RV32I-NEXT: bnez s0, .LBB8_21
+; RV32I-NEXT: j .LBB8_22
+; RV32I-NEXT: .LBB8_20:
; RV32I-NEXT: sll t1, a7, a2
; RV32I-NEXT: neg s1, s0
; RV32I-NEXT: srl a7, a7, s1
; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: beqz s0, .LBB8_21
-; RV32I-NEXT: .LBB8_20:
-; RV32I-NEXT: mv a3, a7
+; RV32I-NEXT: beqz s0, .LBB8_22
; RV32I-NEXT: .LBB8_21:
-; RV32I-NEXT: bltu a2, a6, .LBB8_23
-; RV32I-NEXT: # %bb.22:
+; RV32I-NEXT: mv a3, a7
+; RV32I-NEXT: .LBB8_22:
+; RV32I-NEXT: bltu a2, a6, .LBB8_24
+; RV32I-NEXT: # %bb.23:
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
-; RV32I-NEXT: bnez a2, .LBB8_24
-; RV32I-NEXT: j .LBB8_25
-; RV32I-NEXT: .LBB8_23:
+; RV32I-NEXT: bnez a2, .LBB8_25
+; RV32I-NEXT: j .LBB8_26
+; RV32I-NEXT: .LBB8_24:
; RV32I-NEXT: or t1, t3, t6
; RV32I-NEXT: or a3, t4, t5
-; RV32I-NEXT: beqz a2, .LBB8_25
-; RV32I-NEXT: .LBB8_24:
+; RV32I-NEXT: beqz a2, .LBB8_26
+; RV32I-NEXT: .LBB8_25:
; RV32I-NEXT: mv t2, t1
; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: .LBB8_25:
+; RV32I-NEXT: .LBB8_26:
; RV32I-NEXT: sw a4, 0(a0)
; RV32I-NEXT: sw a5, 4(a0)
; RV32I-NEXT: sw t2, 8(a0)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
index bc002fee4417c..fd9fb326990a3 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -3210,278 +3210,271 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s8, 0
; RV32I-NEXT: srl a3, a0, a4
-; RV32I-NEXT: mv a5, s0
-; RV32I-NEXT: bnez a4, .LBB12_40
-; RV32I-NEXT: j .LBB12_41
+; RV32I-NEXT: j .LBB12_40
; RV32I-NEXT: .LBB12_39:
; RV32I-NEXT: srl s8, t2, a4
; RV32I-NEXT: srl a3, s0, a4
; RV32I-NEXT: sll a5, a0, s6
; RV32I-NEXT: or a3, a3, a5
-; RV32I-NEXT: mv a5, s0
-; RV32I-NEXT: beqz a4, .LBB12_41
; RV32I-NEXT: .LBB12_40:
+; RV32I-NEXT: mv a5, s0
+; RV32I-NEXT: beqz a4, .LBB12_42
+; RV32I-NEXT: # %bb.41:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB12_41:
-; RV32I-NEXT: bltu a4, t3, .LBB12_44
-; RV32I-NEXT: # %bb.42:
+; RV32I-NEXT: .LBB12_42:
+; RV32I-NEXT: bltu a4, t3, .LBB12_45
+; RV32I-NEXT: # %bb.43:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t3, .LBB12_45
-; RV32I-NEXT: .LBB12_43:
+; RV32I-NEXT: bgeu s7, t3, .LBB12_46
+; RV32I-NEXT: .LBB12_44:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
-; RV32I-NEXT: mv s10, t2
-; RV32I-NEXT: bnez s7, .LBB12_46
; RV32I-NEXT: j .LBB12_47
-; RV32I-NEXT: .LBB12_44:
-; RV32I-NEXT: srl s1, a0, a4
-; RV32I-NEXT: bltu s7, t3, .LBB12_43
; RV32I-NEXT: .LBB12_45:
+; RV32I-NEXT: srl s1, a0, a4
+; RV32I-NEXT: bltu s7, t3, .LBB12_44
+; RV32I-NEXT: .LBB12_46:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: mv s10, t2
-; RV32I-NEXT: beqz s7, .LBB12_47
-; RV32I-NEXT: .LBB12_46:
-; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB12_47:
-; RV32I-NEXT: bltu s9, t3, .LBB12_49
+; RV32I-NEXT: mv s10, t2
+; RV32I-NEXT: beqz s7, .LBB12_49
; RV32I-NEXT: # %bb.48:
-; RV32I-NEXT: srl a3, t2, s9
-; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: bnez s9, .LBB12_50
-; RV32I-NEXT: j .LBB12_51
+; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB12_49:
+; RV32I-NEXT: bltu s9, t3, .LBB12_51
+; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: srl a3, t2, s9
+; RV32I-NEXT: j .LBB12_52
+; RV32I-NEXT: .LBB12_51:
; RV32I-NEXT: sll a3, t2, s11
; RV32I-NEXT: or a3, s2, a3
+; RV32I-NEXT: .LBB12_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB12_51
-; RV32I-NEXT: .LBB12_50:
+; RV32I-NEXT: beqz s9, .LBB12_54
+; RV32I-NEXT: # %bb.53:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB12_51:
-; RV32I-NEXT: bltu s9, t3, .LBB12_53
-; RV32I-NEXT: # %bb.52:
+; RV32I-NEXT: .LBB12_54:
+; RV32I-NEXT: bltu s9, t3, .LBB12_56
+; RV32I-NEXT: # %bb.55:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a4, t6, .LBB12_54
-; RV32I-NEXT: j .LBB12_55
-; RV32I-NEXT: .LBB12_53:
+; RV32I-NEXT: bltu a4, t6, .LBB12_57
+; RV32I-NEXT: j .LBB12_58
+; RV32I-NEXT: .LBB12_56:
; RV32I-NEXT: srl s7, t2, a4
-; RV32I-NEXT: bgeu a4, t6, .LBB12_55
-; RV32I-NEXT: .LBB12_54:
+; RV32I-NEXT: bgeu a4, t6, .LBB12_58
+; RV32I-NEXT: .LBB12_57:
; RV32I-NEXT: or s2, a5, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB12_55:
+; RV32I-NEXT: .LBB12_58:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a5, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a4, .LBB12_57
-; RV32I-NEXT: # %bb.56:
+; RV32I-NEXT: beqz a4, .LBB12_60
+; RV32I-NEXT: # %bb.59:
; RV32I-NEXT: mv a5, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB12_57:
+; RV32I-NEXT: .LBB12_60:
; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a4
-; RV32I-NEXT: bltu a4, t6, .LBB12_59
-; RV32I-NEXT: # %bb.58:
+; RV32I-NEXT: bltu a4, t6, .LBB12_62
+; RV32I-NEXT: # %bb.61:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: .LBB12_59:
+; RV32I-NEXT: .LBB12_62:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a5, t0, s3
; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t3, .LBB12_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: bltu s2, t3, .LBB12_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a3, t0, s2
-; RV32I-NEXT: j .LBB12_62
-; RV32I-NEXT: .LBB12_61:
+; RV32I-NEXT: j .LBB12_65
+; RV32I-NEXT: .LBB12_64:
; RV32I-NEXT: sll s10, t0, s6
; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB12_62:
+; RV32I-NEXT: .LBB12_65:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a6
-; RV32I-NEXT: beqz s2, .LBB12_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: beqz s2, .LBB12_67
+; RV32I-NEXT: # %bb.66:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB12_64:
-; RV32I-NEXT: bltu s1, t3, .LBB12_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: .LBB12_67:
+; RV32I-NEXT: bltu s1, t3, .LBB12_69
+; RV32I-NEXT: # %bb.68:
; RV32I-NEXT: srl a3, a6, s1
-; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: bnez s1, .LBB12_67
-; RV32I-NEXT: j .LBB12_68
-; RV32I-NEXT: .LBB12_66:
+; RV32I-NEXT: j .LBB12_70
+; RV32I-NEXT: .LBB12_69:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a6, a3
; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: .LBB12_70:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz s1, .LBB12_68
-; RV32I-NEXT: .LBB12_67:
+; RV32I-NEXT: beqz s1, .LBB12_72
+; RV32I-NEXT: # %bb.71:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB12_68:
-; RV32I-NEXT: bltu s1, t3, .LBB12_71
-; RV32I-NEXT: # %bb.69:
+; RV32I-NEXT: .LBB12_72:
+; RV32I-NEXT: bltu s1, t3, .LBB12_76
+; RV32I-NEXT: # %bb.73:
; RV32I-NEXT: li s1, 0
+; RV32I-NEXT: .LBB12_74:
; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t3, .LBB12_72
-; RV32I-NEXT: .LBB12_70:
+; RV32I-NEXT: bgeu s2, t3, .LBB12_77
+; RV32I-NEXT: # %bb.75:
; RV32I-NEXT: sll s6, t1, s6
; RV32I-NEXT: srl a3, t1, s3
; RV32I-NEXT: lw s3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB12_73
-; RV32I-NEXT: .LBB12_71:
+; RV32I-NEXT: j .LBB12_78
+; RV32I-NEXT: .LBB12_76:
; RV32I-NEXT: srl s1, a6, s3
-; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t3, .LBB12_70
-; RV32I-NEXT: .LBB12_72:
+; RV32I-NEXT: j .LBB12_74
+; RV32I-NEXT: .LBB12_77:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: .LBB12_73:
+; RV32I-NEXT: .LBB12_78:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, t5
-; RV32I-NEXT: beqz s2, .LBB12_75
-; RV32I-NEXT: # %bb.74:
+; RV32I-NEXT: beqz s2, .LBB12_80
+; RV32I-NEXT: # %bb.79:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB12_75:
-; RV32I-NEXT: bltu s9, t3, .LBB12_77
-; RV32I-NEXT: # %bb.76:
+; RV32I-NEXT: .LBB12_80:
+; RV32I-NEXT: bltu s9, t3, .LBB12_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t0, s9
-; RV32I-NEXT: mv s7, a6
-; RV32I-NEXT: bnez s9, .LBB12_78
-; RV32I-NEXT: j .LBB12_79
-; RV32I-NEXT: .LBB12_77:
+; RV32I-NEXT: j .LBB12_83
+; RV32I-NEXT: .LBB12_82:
; RV32I-NEXT: sll s3, t0, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t0, a3
; RV32I-NEXT: sll s7, a6, s2
; RV32I-NEXT: or a3, a3, s7
+; RV32I-NEXT: .LBB12_83:
; RV32I-NEXT: mv s7, a6
-; RV32I-NEXT: beqz s9, .LBB12_79
-; RV32I-NEXT: .LBB12_78:
+; RV32I-NEXT: beqz s9, .LBB12_85
+; RV32I-NEXT: # %bb.84:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB12_79:
-; RV32I-NEXT: bltu s2, t6, .LBB12_81
-; RV32I-NEXT: # %bb.80:
+; RV32I-NEXT: .LBB12_85:
+; RV32I-NEXT: bltu s2, t6, .LBB12_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: sw zero, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB12_82
-; RV32I-NEXT: .LBB12_81:
+; RV32I-NEXT: j .LBB12_88
+; RV32I-NEXT: .LBB12_87:
; RV32I-NEXT: sw s10, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s3, a5, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB12_82:
+; RV32I-NEXT: .LBB12_88:
; RV32I-NEXT: addi ra, a4, -128
; RV32I-NEXT: mv s5, t1
; RV32I-NEXT: mv s6, t5
-; RV32I-NEXT: beqz s2, .LBB12_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: beqz s2, .LBB12_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: mv s5, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB12_84:
+; RV32I-NEXT: .LBB12_90:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, t5, s9
-; RV32I-NEXT: bltu ra, t3, .LBB12_86
-; RV32I-NEXT: # %bb.85:
+; RV32I-NEXT: bltu ra, t3, .LBB12_92
+; RV32I-NEXT: # %bb.91:
; RV32I-NEXT: srl a3, t5, ra
-; RV32I-NEXT: mv s1, t1
-; RV32I-NEXT: bnez ra, .LBB12_87
-; RV32I-NEXT: j .LBB12_88
-; RV32I-NEXT: .LBB12_86:
+; RV32I-NEXT: j .LBB12_93
+; RV32I-NEXT: .LBB12_92:
; RV32I-NEXT: lw a3, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
+; RV32I-NEXT: .LBB12_93:
; RV32I-NEXT: mv s1, t1
-; RV32I-NEXT: beqz ra, .LBB12_88
-; RV32I-NEXT: .LBB12_87:
+; RV32I-NEXT: beqz ra, .LBB12_95
+; RV32I-NEXT: # %bb.94:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB12_88:
-; RV32I-NEXT: bltu ra, t3, .LBB12_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: .LBB12_95:
+; RV32I-NEXT: bltu ra, t3, .LBB12_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: srl a3, a6, ra
-; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: bnez ra, .LBB12_91
-; RV32I-NEXT: j .LBB12_92
-; RV32I-NEXT: .LBB12_90:
+; RV32I-NEXT: j .LBB12_98
+; RV32I-NEXT: .LBB12_97:
; RV32I-NEXT: srl s2, t5, a4
; RV32I-NEXT: sll a3, a6, s9
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: .LBB12_98:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz ra, .LBB12_92
-; RV32I-NEXT: .LBB12_91:
+; RV32I-NEXT: beqz ra, .LBB12_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB12_92:
+; RV32I-NEXT: .LBB12_100:
; RV32I-NEXT: sub s10, t6, ra
-; RV32I-NEXT: bltu ra, t3, .LBB12_95
-; RV32I-NEXT: # %bb.93:
+; RV32I-NEXT: bltu ra, t3, .LBB12_103
+; RV32I-NEXT: # %bb.101:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bgeu s10, t3, .LBB12_96
-; RV32I-NEXT: .LBB12_94:
+; RV32I-NEXT: bgeu s10, t3, .LBB12_104
+; RV32I-NEXT: .LBB12_102:
; RV32I-NEXT: sll s9, t1, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB12_97
-; RV32I-NEXT: .LBB12_95:
+; RV32I-NEXT: j .LBB12_105
+; RV32I-NEXT: .LBB12_103:
; RV32I-NEXT: srl s7, a6, a4
-; RV32I-NEXT: bltu s10, t3, .LBB12_94
-; RV32I-NEXT: .LBB12_96:
+; RV32I-NEXT: bltu s10, t3, .LBB12_102
+; RV32I-NEXT: .LBB12_104:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t1, s10
-; RV32I-NEXT: .LBB12_97:
+; RV32I-NEXT: .LBB12_105:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, t5
-; RV32I-NEXT: beqz s10, .LBB12_99
-; RV32I-NEXT: # %bb.98:
+; RV32I-NEXT: beqz s10, .LBB12_107
+; RV32I-NEXT: # %bb.106:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB12_99:
-; RV32I-NEXT: bltu s11, t3, .LBB12_101
-; RV32I-NEXT: # %bb.100:
+; RV32I-NEXT: .LBB12_107:
+; RV32I-NEXT: bltu s11, t3, .LBB12_109
+; RV32I-NEXT: # %bb.108:
; RV32I-NEXT: srl a3, t5, s11
-; RV32I-NEXT: bnez s11, .LBB12_102
-; RV32I-NEXT: j .LBB12_103
-; RV32I-NEXT: .LBB12_101:
+; RV32I-NEXT: bnez s11, .LBB12_110
+; RV32I-NEXT: j .LBB12_111
+; RV32I-NEXT: .LBB12_109:
; RV32I-NEXT: srl a3, t1, ra
; RV32I-NEXT: neg s10, s11
; RV32I-NEXT: sll s10, t5, s10
; RV32I-NEXT: or a3, a3, s10
-; RV32I-NEXT: beqz s11, .LBB12_103
-; RV32I-NEXT: .LBB12_102:
+; RV32I-NEXT: beqz s11, .LBB12_111
+; RV32I-NEXT: .LBB12_110:
; RV32I-NEXT: mv t1, a3
-; RV32I-NEXT: .LBB12_103:
-; RV32I-NEXT: bltu s11, t3, .LBB12_105
-; RV32I-NEXT: # %bb.104:
+; RV32I-NEXT: .LBB12_111:
+; RV32I-NEXT: bltu s11, t3, .LBB12_113
+; RV32I-NEXT: # %bb.112:
; RV32I-NEXT: li t3, 0
-; RV32I-NEXT: bltu ra, t6, .LBB12_106
-; RV32I-NEXT: j .LBB12_107
-; RV32I-NEXT: .LBB12_105:
+; RV32I-NEXT: bltu ra, t6, .LBB12_114
+; RV32I-NEXT: j .LBB12_115
+; RV32I-NEXT: .LBB12_113:
; RV32I-NEXT: srl t3, t5, ra
-; RV32I-NEXT: bgeu ra, t6, .LBB12_107
-; RV32I-NEXT: .LBB12_106:
+; RV32I-NEXT: bgeu ra, t6, .LBB12_115
+; RV32I-NEXT: .LBB12_114:
; RV32I-NEXT: or t1, a5, s9
; RV32I-NEXT: or t3, s7, s3
-; RV32I-NEXT: .LBB12_107:
+; RV32I-NEXT: .LBB12_115:
; RV32I-NEXT: li a5, 128
-; RV32I-NEXT: bnez ra, .LBB12_114
-; RV32I-NEXT: # %bb.108:
-; RV32I-NEXT: bgeu ra, t6, .LBB12_115
-; RV32I-NEXT: .LBB12_109:
-; RV32I-NEXT: bltu a4, a5, .LBB12_116
-; RV32I-NEXT: .LBB12_110:
+; RV32I-NEXT: bnez ra, .LBB12_122
+; RV32I-NEXT: # %bb.116:
+; RV32I-NEXT: bgeu ra, t6, .LBB12_123
+; RV32I-NEXT: .LBB12_117:
+; RV32I-NEXT: bltu a4, a5, .LBB12_124
+; RV32I-NEXT: .LBB12_118:
; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bnez a4, .LBB12_117
-; RV32I-NEXT: .LBB12_111:
-; RV32I-NEXT: bltu a4, a5, .LBB12_113
-; RV32I-NEXT: .LBB12_112:
+; RV32I-NEXT: bnez a4, .LBB12_125
+; RV32I-NEXT: # %bb.119:
+; RV32I-NEXT: bltu a4, a5, .LBB12_121
+; RV32I-NEXT: .LBB12_120:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: li ra, 0
-; RV32I-NEXT: .LBB12_113:
+; RV32I-NEXT: .LBB12_121:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli t0, s0, 24
@@ -3563,15 +3556,15 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB12_114:
+; RV32I-NEXT: .LBB12_122:
; RV32I-NEXT: mv t0, t1
; RV32I-NEXT: mv a6, t3
-; RV32I-NEXT: bltu ra, t6, .LBB12_109
-; RV32I-NEXT: .LBB12_115:
+; RV32I-NEXT: bltu ra, t6, .LBB12_117
+; RV32I-NEXT: .LBB12_123:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: li s2, 0
-; RV32I-NEXT: bgeu a4, a5, .LBB12_110
-; RV32I-NEXT: .LBB12_116:
+; RV32I-NEXT: bgeu a4, a5, .LBB12_118
+; RV32I-NEXT: .LBB12_124:
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t0, a3, a6
@@ -3581,15 +3574,14 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or s1, a3, s5
; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: beqz a4, .LBB12_111
-; RV32I-NEXT: .LBB12_117:
+; RV32I-NEXT: j .LBB12_118
+; RV32I-NEXT: .LBB12_125:
; RV32I-NEXT: mv s0, t0
; RV32I-NEXT: mv a0, a6
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t2, s2
-; RV32I-NEXT: bgeu a4, a5, .LBB12_112
-; RV32I-NEXT: j .LBB12_113
+; RV32I-NEXT: bgeu a4, a5, .LBB12_120
+; RV32I-NEXT: j .LBB12_121
%src = load i256, ptr %src.ptr, align 1
%byteOff = load i256, ptr %byteOff.ptr, align 1
%bitOff = shl i256 %byteOff, 3
@@ -4134,278 +4126,271 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s8, 0
; RV32I-NEXT: srl a3, a0, a4
-; RV32I-NEXT: mv a5, s0
-; RV32I-NEXT: bnez a4, .LBB13_40
-; RV32I-NEXT: j .LBB13_41
+; RV32I-NEXT: j .LBB13_40
; RV32I-NEXT: .LBB13_39:
; RV32I-NEXT: srl s8, t2, a4
; RV32I-NEXT: srl a3, s0, a4
; RV32I-NEXT: sll a5, a0, s6
; RV32I-NEXT: or a3, a3, a5
-; RV32I-NEXT: mv a5, s0
-; RV32I-NEXT: beqz a4, .LBB13_41
; RV32I-NEXT: .LBB13_40:
+; RV32I-NEXT: mv a5, s0
+; RV32I-NEXT: beqz a4, .LBB13_42
+; RV32I-NEXT: # %bb.41:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB13_41:
-; RV32I-NEXT: bltu a4, t3, .LBB13_44
-; RV32I-NEXT: # %bb.42:
+; RV32I-NEXT: .LBB13_42:
+; RV32I-NEXT: bltu a4, t3, .LBB13_45
+; RV32I-NEXT: # %bb.43:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t3, .LBB13_45
-; RV32I-NEXT: .LBB13_43:
+; RV32I-NEXT: bgeu s7, t3, .LBB13_46
+; RV32I-NEXT: .LBB13_44:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
-; RV32I-NEXT: mv s10, t2
-; RV32I-NEXT: bnez s7, .LBB13_46
; RV32I-NEXT: j .LBB13_47
-; RV32I-NEXT: .LBB13_44:
-; RV32I-NEXT: srl s1, a0, a4
-; RV32I-NEXT: bltu s7, t3, .LBB13_43
; RV32I-NEXT: .LBB13_45:
+; RV32I-NEXT: srl s1, a0, a4
+; RV32I-NEXT: bltu s7, t3, .LBB13_44
+; RV32I-NEXT: .LBB13_46:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: mv s10, t2
-; RV32I-NEXT: beqz s7, .LBB13_47
-; RV32I-NEXT: .LBB13_46:
-; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB13_47:
-; RV32I-NEXT: bltu s9, t3, .LBB13_49
+; RV32I-NEXT: mv s10, t2
+; RV32I-NEXT: beqz s7, .LBB13_49
; RV32I-NEXT: # %bb.48:
-; RV32I-NEXT: srl a3, t2, s9
-; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: bnez s9, .LBB13_50
-; RV32I-NEXT: j .LBB13_51
+; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB13_49:
+; RV32I-NEXT: bltu s9, t3, .LBB13_51
+; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: srl a3, t2, s9
+; RV32I-NEXT: j .LBB13_52
+; RV32I-NEXT: .LBB13_51:
; RV32I-NEXT: sll a3, t2, s11
; RV32I-NEXT: or a3, s2, a3
+; RV32I-NEXT: .LBB13_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB13_51
-; RV32I-NEXT: .LBB13_50:
+; RV32I-NEXT: beqz s9, .LBB13_54
+; RV32I-NEXT: # %bb.53:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB13_51:
-; RV32I-NEXT: bltu s9, t3, .LBB13_53
-; RV32I-NEXT: # %bb.52:
+; RV32I-NEXT: .LBB13_54:
+; RV32I-NEXT: bltu s9, t3, .LBB13_56
+; RV32I-NEXT: # %bb.55:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a4, t6, .LBB13_54
-; RV32I-NEXT: j .LBB13_55
-; RV32I-NEXT: .LBB13_53:
+; RV32I-NEXT: bltu a4, t6, .LBB13_57
+; RV32I-NEXT: j .LBB13_58
+; RV32I-NEXT: .LBB13_56:
; RV32I-NEXT: srl s7, t2, a4
-; RV32I-NEXT: bgeu a4, t6, .LBB13_55
-; RV32I-NEXT: .LBB13_54:
+; RV32I-NEXT: bgeu a4, t6, .LBB13_58
+; RV32I-NEXT: .LBB13_57:
; RV32I-NEXT: or s2, a5, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB13_55:
+; RV32I-NEXT: .LBB13_58:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a5, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a4, .LBB13_57
-; RV32I-NEXT: # %bb.56:
+; RV32I-NEXT: beqz a4, .LBB13_60
+; RV32I-NEXT: # %bb.59:
; RV32I-NEXT: mv a5, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB13_57:
+; RV32I-NEXT: .LBB13_60:
; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a4
-; RV32I-NEXT: bltu a4, t6, .LBB13_59
-; RV32I-NEXT: # %bb.58:
+; RV32I-NEXT: bltu a4, t6, .LBB13_62
+; RV32I-NEXT: # %bb.61:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: .LBB13_59:
+; RV32I-NEXT: .LBB13_62:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a5, t0, s3
; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t3, .LBB13_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: bltu s2, t3, .LBB13_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a3, t0, s2
-; RV32I-NEXT: j .LBB13_62
-; RV32I-NEXT: .LBB13_61:
+; RV32I-NEXT: j .LBB13_65
+; RV32I-NEXT: .LBB13_64:
; RV32I-NEXT: sll s10, t0, s6
; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB13_62:
+; RV32I-NEXT: .LBB13_65:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a6
-; RV32I-NEXT: beqz s2, .LBB13_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: beqz s2, .LBB13_67
+; RV32I-NEXT: # %bb.66:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB13_64:
-; RV32I-NEXT: bltu s1, t3, .LBB13_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: .LBB13_67:
+; RV32I-NEXT: bltu s1, t3, .LBB13_69
+; RV32I-NEXT: # %bb.68:
; RV32I-NEXT: srl a3, a6, s1
-; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: bnez s1, .LBB13_67
-; RV32I-NEXT: j .LBB13_68
-; RV32I-NEXT: .LBB13_66:
+; RV32I-NEXT: j .LBB13_70
+; RV32I-NEXT: .LBB13_69:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a6, a3
; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: .LBB13_70:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz s1, .LBB13_68
-; RV32I-NEXT: .LBB13_67:
+; RV32I-NEXT: beqz s1, .LBB13_72
+; RV32I-NEXT: # %bb.71:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB13_68:
-; RV32I-NEXT: bltu s1, t3, .LBB13_71
-; RV32I-NEXT: # %bb.69:
+; RV32I-NEXT: .LBB13_72:
+; RV32I-NEXT: bltu s1, t3, .LBB13_76
+; RV32I-NEXT: # %bb.73:
; RV32I-NEXT: li s1, 0
+; RV32I-NEXT: .LBB13_74:
; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t3, .LBB13_72
-; RV32I-NEXT: .LBB13_70:
+; RV32I-NEXT: bgeu s2, t3, .LBB13_77
+; RV32I-NEXT: # %bb.75:
; RV32I-NEXT: sll s6, t1, s6
; RV32I-NEXT: srl a3, t1, s3
; RV32I-NEXT: lw s3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB13_73
-; RV32I-NEXT: .LBB13_71:
+; RV32I-NEXT: j .LBB13_78
+; RV32I-NEXT: .LBB13_76:
; RV32I-NEXT: srl s1, a6, s3
-; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t3, .LBB13_70
-; RV32I-NEXT: .LBB13_72:
+; RV32I-NEXT: j .LBB13_74
+; RV32I-NEXT: .LBB13_77:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: .LBB13_73:
+; RV32I-NEXT: .LBB13_78:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, t5
-; RV32I-NEXT: beqz s2, .LBB13_75
-; RV32I-NEXT: # %bb.74:
+; RV32I-NEXT: beqz s2, .LBB13_80
+; RV32I-NEXT: # %bb.79:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB13_75:
-; RV32I-NEXT: bltu s9, t3, .LBB13_77
-; RV32I-NEXT: # %bb.76:
+; RV32I-NEXT: .LBB13_80:
+; RV32I-NEXT: bltu s9, t3, .LBB13_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t0, s9
-; RV32I-NEXT: mv s7, a6
-; RV32I-NEXT: bnez s9, .LBB13_78
-; RV32I-NEXT: j .LBB13_79
-; RV32I-NEXT: .LBB13_77:
+; RV32I-NEXT: j .LBB13_83
+; RV32I-NEXT: .LBB13_82:
; RV32I-NEXT: sll s3, t0, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t0, a3
; RV32I-NEXT: sll s7, a6, s2
; RV32I-NEXT: or a3, a3, s7
+; RV32I-NEXT: .LBB13_83:
; RV32I-NEXT: mv s7, a6
-; RV32I-NEXT: beqz s9, .LBB13_79
-; RV32I-NEXT: .LBB13_78:
+; RV32I-NEXT: beqz s9, .LBB13_85
+; RV32I-NEXT: # %bb.84:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB13_79:
-; RV32I-NEXT: bltu s2, t6, .LBB13_81
-; RV32I-NEXT: # %bb.80:
+; RV32I-NEXT: .LBB13_85:
+; RV32I-NEXT: bltu s2, t6, .LBB13_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: sw zero, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB13_82
-; RV32I-NEXT: .LBB13_81:
+; RV32I-NEXT: j .LBB13_88
+; RV32I-NEXT: .LBB13_87:
; RV32I-NEXT: sw s10, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s3, a5, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB13_82:
+; RV32I-NEXT: .LBB13_88:
; RV32I-NEXT: addi ra, a4, -128
; RV32I-NEXT: mv s5, t1
; RV32I-NEXT: mv s6, t5
-; RV32I-NEXT: beqz s2, .LBB13_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: beqz s2, .LBB13_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: mv s5, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB13_84:
+; RV32I-NEXT: .LBB13_90:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, t5, s9
-; RV32I-NEXT: bltu ra, t3, .LBB13_86
-; RV32I-NEXT: # %bb.85:
+; RV32I-NEXT: bltu ra, t3, .LBB13_92
+; RV32I-NEXT: # %bb.91:
; RV32I-NEXT: srl a3, t5, ra
-; RV32I-NEXT: mv s1, t1
-; RV32I-NEXT: bnez ra, .LBB13_87
-; RV32I-NEXT: j .LBB13_88
-; RV32I-NEXT: .LBB13_86:
+; RV32I-NEXT: j .LBB13_93
+; RV32I-NEXT: .LBB13_92:
; RV32I-NEXT: lw a3, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
+; RV32I-NEXT: .LBB13_93:
; RV32I-NEXT: mv s1, t1
-; RV32I-NEXT: beqz ra, .LBB13_88
-; RV32I-NEXT: .LBB13_87:
+; RV32I-NEXT: beqz ra, .LBB13_95
+; RV32I-NEXT: # %bb.94:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB13_88:
-; RV32I-NEXT: bltu ra, t3, .LBB13_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: .LBB13_95:
+; RV32I-NEXT: bltu ra, t3, .LBB13_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: srl a3, a6, ra
-; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: bnez ra, .LBB13_91
-; RV32I-NEXT: j .LBB13_92
-; RV32I-NEXT: .LBB13_90:
+; RV32I-NEXT: j .LBB13_98
+; RV32I-NEXT: .LBB13_97:
; RV32I-NEXT: srl s2, t5, a4
; RV32I-NEXT: sll a3, a6, s9
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: .LBB13_98:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz ra, .LBB13_92
-; RV32I-NEXT: .LBB13_91:
+; RV32I-NEXT: beqz ra, .LBB13_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB13_92:
+; RV32I-NEXT: .LBB13_100:
; RV32I-NEXT: sub s10, t6, ra
-; RV32I-NEXT: bltu ra, t3, .LBB13_95
-; RV32I-NEXT: # %bb.93:
+; RV32I-NEXT: bltu ra, t3, .LBB13_103
+; RV32I-NEXT: # %bb.101:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bgeu s10, t3, .LBB13_96
-; RV32I-NEXT: .LBB13_94:
+; RV32I-NEXT: bgeu s10, t3, .LBB13_104
+; RV32I-NEXT: .LBB13_102:
; RV32I-NEXT: sll s9, t1, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB13_97
-; RV32I-NEXT: .LBB13_95:
+; RV32I-NEXT: j .LBB13_105
+; RV32I-NEXT: .LBB13_103:
; RV32I-NEXT: srl s7, a6, a4
-; RV32I-NEXT: bltu s10, t3, .LBB13_94
-; RV32I-NEXT: .LBB13_96:
+; RV32I-NEXT: bltu s10, t3, .LBB13_102
+; RV32I-NEXT: .LBB13_104:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t1, s10
-; RV32I-NEXT: .LBB13_97:
+; RV32I-NEXT: .LBB13_105:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, t5
-; RV32I-NEXT: beqz s10, .LBB13_99
-; RV32I-NEXT: # %bb.98:
+; RV32I-NEXT: beqz s10, .LBB13_107
+; RV32I-NEXT: # %bb.106:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB13_99:
-; RV32I-NEXT: bltu s11, t3, .LBB13_101
-; RV32I-NEXT: # %bb.100:
+; RV32I-NEXT: .LBB13_107:
+; RV32I-NEXT: bltu s11, t3, .LBB13_109
+; RV32I-NEXT: # %bb.108:
; RV32I-NEXT: srl a3, t5, s11
-; RV32I-NEXT: bnez s11, .LBB13_102
-; RV32I-NEXT: j .LBB13_103
-; RV32I-NEXT: .LBB13_101:
+; RV32I-NEXT: bnez s11, .LBB13_110
+; RV32I-NEXT: j .LBB13_111
+; RV32I-NEXT: .LBB13_109:
; RV32I-NEXT: srl a3, t1, ra
; RV32I-NEXT: neg s10, s11
; RV32I-NEXT: sll s10, t5, s10
; RV32I-NEXT: or a3, a3, s10
-; RV32I-NEXT: beqz s11, .LBB13_103
-; RV32I-NEXT: .LBB13_102:
+; RV32I-NEXT: beqz s11, .LBB13_111
+; RV32I-NEXT: .LBB13_110:
; RV32I-NEXT: mv t1, a3
-; RV32I-NEXT: .LBB13_103:
-; RV32I-NEXT: bltu s11, t3, .LBB13_105
-; RV32I-NEXT: # %bb.104:
+; RV32I-NEXT: .LBB13_111:
+; RV32I-NEXT: bltu s11, t3, .LBB13_113
+; RV32I-NEXT: # %bb.112:
; RV32I-NEXT: li t3, 0
-; RV32I-NEXT: bltu ra, t6, .LBB13_106
-; RV32I-NEXT: j .LBB13_107
-; RV32I-NEXT: .LBB13_105:
+; RV32I-NEXT: bltu ra, t6, .LBB13_114
+; RV32I-NEXT: j .LBB13_115
+; RV32I-NEXT: .LBB13_113:
; RV32I-NEXT: srl t3, t5, ra
-; RV32I-NEXT: bgeu ra, t6, .LBB13_107
-; RV32I-NEXT: .LBB13_106:
+; RV32I-NEXT: bgeu ra, t6, .LBB13_115
+; RV32I-NEXT: .LBB13_114:
; RV32I-NEXT: or t1, a5, s9
; RV32I-NEXT: or t3, s7, s3
-; RV32I-NEXT: .LBB13_107:
+; RV32I-NEXT: .LBB13_115:
; RV32I-NEXT: li a5, 128
-; RV32I-NEXT: bnez ra, .LBB13_114
-; RV32I-NEXT: # %bb.108:
-; RV32I-NEXT: bgeu ra, t6, .LBB13_115
-; RV32I-NEXT: .LBB13_109:
-; RV32I-NEXT: bltu a4, a5, .LBB13_116
-; RV32I-NEXT: .LBB13_110:
+; RV32I-NEXT: bnez ra, .LBB13_122
+; RV32I-NEXT: # %bb.116:
+; RV32I-NEXT: bgeu ra, t6, .LBB13_123
+; RV32I-NEXT: .LBB13_117:
+; RV32I-NEXT: bltu a4, a5, .LBB13_124
+; RV32I-NEXT: .LBB13_118:
; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bnez a4, .LBB13_117
-; RV32I-NEXT: .LBB13_111:
-; RV32I-NEXT: bltu a4, a5, .LBB13_113
-; RV32I-NEXT: .LBB13_112:
+; RV32I-NEXT: bnez a4, .LBB13_125
+; RV32I-NEXT: # %bb.119:
+; RV32I-NEXT: bltu a4, a5, .LBB13_121
+; RV32I-NEXT: .LBB13_120:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: li ra, 0
-; RV32I-NEXT: .LBB13_113:
+; RV32I-NEXT: .LBB13_121:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli t0, s0, 24
@@ -4487,15 +4472,15 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB13_114:
+; RV32I-NEXT: .LBB13_122:
; RV32I-NEXT: mv t0, t1
; RV32I-NEXT: mv a6, t3
-; RV32I-NEXT: bltu ra, t6, .LBB13_109
-; RV32I-NEXT: .LBB13_115:
+; RV32I-NEXT: bltu ra, t6, .LBB13_117
+; RV32I-NEXT: .LBB13_123:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: li s2, 0
-; RV32I-NEXT: bgeu a4, a5, .LBB13_110
-; RV32I-NEXT: .LBB13_116:
+; RV32I-NEXT: bgeu a4, a5, .LBB13_118
+; RV32I-NEXT: .LBB13_124:
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t0, a3, a6
@@ -4505,15 +4490,14 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: or s1, a3, s5
; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: beqz a4, .LBB13_111
-; RV32I-NEXT: .LBB13_117:
+; RV32I-NEXT: j .LBB13_118
+; RV32I-NEXT: .LBB13_125:
; RV32I-NEXT: mv s0, t0
; RV32I-NEXT: mv a0, a6
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t2, s2
-; RV32I-NEXT: bgeu a4, a5, .LBB13_112
-; RV32I-NEXT: j .LBB13_113
+; RV32I-NEXT: bgeu a4, a5, .LBB13_120
+; RV32I-NEXT: j .LBB13_121
%src = load i256, ptr %src.ptr, align 1
%wordOff = load i256, ptr %wordOff.ptr, align 1
%bitOff = shl i256 %wordOff, 5
@@ -5058,278 +5042,271 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s8, 0
; RV32I-NEXT: srl a3, a0, a4
-; RV32I-NEXT: mv a5, s0
-; RV32I-NEXT: bnez a4, .LBB14_40
-; RV32I-NEXT: j .LBB14_41
+; RV32I-NEXT: j .LBB14_40
; RV32I-NEXT: .LBB14_39:
; RV32I-NEXT: srl s8, t2, a4
; RV32I-NEXT: srl a3, s0, a4
; RV32I-NEXT: sll a5, a0, s6
; RV32I-NEXT: or a3, a3, a5
-; RV32I-NEXT: mv a5, s0
-; RV32I-NEXT: beqz a4, .LBB14_41
; RV32I-NEXT: .LBB14_40:
+; RV32I-NEXT: mv a5, s0
+; RV32I-NEXT: beqz a4, .LBB14_42
+; RV32I-NEXT: # %bb.41:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB14_41:
-; RV32I-NEXT: bltu a4, t3, .LBB14_44
-; RV32I-NEXT: # %bb.42:
+; RV32I-NEXT: .LBB14_42:
+; RV32I-NEXT: bltu a4, t3, .LBB14_45
+; RV32I-NEXT: # %bb.43:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t3, .LBB14_45
-; RV32I-NEXT: .LBB14_43:
+; RV32I-NEXT: bgeu s7, t3, .LBB14_46
+; RV32I-NEXT: .LBB14_44:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
-; RV32I-NEXT: mv s10, t2
-; RV32I-NEXT: bnez s7, .LBB14_46
; RV32I-NEXT: j .LBB14_47
-; RV32I-NEXT: .LBB14_44:
-; RV32I-NEXT: srl s1, a0, a4
-; RV32I-NEXT: bltu s7, t3, .LBB14_43
; RV32I-NEXT: .LBB14_45:
+; RV32I-NEXT: srl s1, a0, a4
+; RV32I-NEXT: bltu s7, t3, .LBB14_44
+; RV32I-NEXT: .LBB14_46:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: mv s10, t2
-; RV32I-NEXT: beqz s7, .LBB14_47
-; RV32I-NEXT: .LBB14_46:
-; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB14_47:
-; RV32I-NEXT: bltu s9, t3, .LBB14_49
+; RV32I-NEXT: mv s10, t2
+; RV32I-NEXT: beqz s7, .LBB14_49
; RV32I-NEXT: # %bb.48:
-; RV32I-NEXT: srl a3, t2, s9
-; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: bnez s9, .LBB14_50
-; RV32I-NEXT: j .LBB14_51
+; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB14_49:
+; RV32I-NEXT: bltu s9, t3, .LBB14_51
+; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: srl a3, t2, s9
+; RV32I-NEXT: j .LBB14_52
+; RV32I-NEXT: .LBB14_51:
; RV32I-NEXT: sll a3, t2, s11
; RV32I-NEXT: or a3, s2, a3
+; RV32I-NEXT: .LBB14_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB14_51
-; RV32I-NEXT: .LBB14_50:
+; RV32I-NEXT: beqz s9, .LBB14_54
+; RV32I-NEXT: # %bb.53:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB14_51:
-; RV32I-NEXT: bltu s9, t3, .LBB14_53
-; RV32I-NEXT: # %bb.52:
+; RV32I-NEXT: .LBB14_54:
+; RV32I-NEXT: bltu s9, t3, .LBB14_56
+; RV32I-NEXT: # %bb.55:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a4, t6, .LBB14_54
-; RV32I-NEXT: j .LBB14_55
-; RV32I-NEXT: .LBB14_53:
+; RV32I-NEXT: bltu a4, t6, .LBB14_57
+; RV32I-NEXT: j .LBB14_58
+; RV32I-NEXT: .LBB14_56:
; RV32I-NEXT: srl s7, t2, a4
-; RV32I-NEXT: bgeu a4, t6, .LBB14_55
-; RV32I-NEXT: .LBB14_54:
+; RV32I-NEXT: bgeu a4, t6, .LBB14_58
+; RV32I-NEXT: .LBB14_57:
; RV32I-NEXT: or s2, a5, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB14_55:
+; RV32I-NEXT: .LBB14_58:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a5, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a4, .LBB14_57
-; RV32I-NEXT: # %bb.56:
+; RV32I-NEXT: beqz a4, .LBB14_60
+; RV32I-NEXT: # %bb.59:
; RV32I-NEXT: mv a5, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB14_57:
+; RV32I-NEXT: .LBB14_60:
; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a4
-; RV32I-NEXT: bltu a4, t6, .LBB14_59
-; RV32I-NEXT: # %bb.58:
+; RV32I-NEXT: bltu a4, t6, .LBB14_62
+; RV32I-NEXT: # %bb.61:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: .LBB14_59:
+; RV32I-NEXT: .LBB14_62:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a5, t0, s3
; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t3, .LBB14_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: bltu s2, t3, .LBB14_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a3, t0, s2
-; RV32I-NEXT: j .LBB14_62
-; RV32I-NEXT: .LBB14_61:
+; RV32I-NEXT: j .LBB14_65
+; RV32I-NEXT: .LBB14_64:
; RV32I-NEXT: sll s10, t0, s6
; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB14_62:
+; RV32I-NEXT: .LBB14_65:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a6
-; RV32I-NEXT: beqz s2, .LBB14_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: beqz s2, .LBB14_67
+; RV32I-NEXT: # %bb.66:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB14_64:
-; RV32I-NEXT: bltu s1, t3, .LBB14_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: .LBB14_67:
+; RV32I-NEXT: bltu s1, t3, .LBB14_69
+; RV32I-NEXT: # %bb.68:
; RV32I-NEXT: srl a3, a6, s1
-; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: bnez s1, .LBB14_67
-; RV32I-NEXT: j .LBB14_68
-; RV32I-NEXT: .LBB14_66:
+; RV32I-NEXT: j .LBB14_70
+; RV32I-NEXT: .LBB14_69:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a6, a3
; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: .LBB14_70:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz s1, .LBB14_68
-; RV32I-NEXT: .LBB14_67:
+; RV32I-NEXT: beqz s1, .LBB14_72
+; RV32I-NEXT: # %bb.71:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB14_68:
-; RV32I-NEXT: bltu s1, t3, .LBB14_71
-; RV32I-NEXT: # %bb.69:
+; RV32I-NEXT: .LBB14_72:
+; RV32I-NEXT: bltu s1, t3, .LBB14_76
+; RV32I-NEXT: # %bb.73:
; RV32I-NEXT: li s1, 0
+; RV32I-NEXT: .LBB14_74:
; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t3, .LBB14_72
-; RV32I-NEXT: .LBB14_70:
+; RV32I-NEXT: bgeu s2, t3, .LBB14_77
+; RV32I-NEXT: # %bb.75:
; RV32I-NEXT: sll s6, t1, s6
; RV32I-NEXT: srl a3, t1, s3
; RV32I-NEXT: lw s3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB14_73
-; RV32I-NEXT: .LBB14_71:
+; RV32I-NEXT: j .LBB14_78
+; RV32I-NEXT: .LBB14_76:
; RV32I-NEXT: srl s1, a6, s3
-; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t3, .LBB14_70
-; RV32I-NEXT: .LBB14_72:
+; RV32I-NEXT: j .LBB14_74
+; RV32I-NEXT: .LBB14_77:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: .LBB14_73:
+; RV32I-NEXT: .LBB14_78:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, t5
-; RV32I-NEXT: beqz s2, .LBB14_75
-; RV32I-NEXT: # %bb.74:
+; RV32I-NEXT: beqz s2, .LBB14_80
+; RV32I-NEXT: # %bb.79:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB14_75:
-; RV32I-NEXT: bltu s9, t3, .LBB14_77
-; RV32I-NEXT: # %bb.76:
+; RV32I-NEXT: .LBB14_80:
+; RV32I-NEXT: bltu s9, t3, .LBB14_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t0, s9
-; RV32I-NEXT: mv s7, a6
-; RV32I-NEXT: bnez s9, .LBB14_78
-; RV32I-NEXT: j .LBB14_79
-; RV32I-NEXT: .LBB14_77:
+; RV32I-NEXT: j .LBB14_83
+; RV32I-NEXT: .LBB14_82:
; RV32I-NEXT: sll s3, t0, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t0, a3
; RV32I-NEXT: sll s7, a6, s2
; RV32I-NEXT: or a3, a3, s7
+; RV32I-NEXT: .LBB14_83:
; RV32I-NEXT: mv s7, a6
-; RV32I-NEXT: beqz s9, .LBB14_79
-; RV32I-NEXT: .LBB14_78:
+; RV32I-NEXT: beqz s9, .LBB14_85
+; RV32I-NEXT: # %bb.84:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB14_79:
-; RV32I-NEXT: bltu s2, t6, .LBB14_81
-; RV32I-NEXT: # %bb.80:
+; RV32I-NEXT: .LBB14_85:
+; RV32I-NEXT: bltu s2, t6, .LBB14_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: sw zero, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB14_82
-; RV32I-NEXT: .LBB14_81:
+; RV32I-NEXT: j .LBB14_88
+; RV32I-NEXT: .LBB14_87:
; RV32I-NEXT: sw s10, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s3, a5, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB14_82:
+; RV32I-NEXT: .LBB14_88:
; RV32I-NEXT: addi ra, a4, -128
; RV32I-NEXT: mv s5, t1
; RV32I-NEXT: mv s6, t5
-; RV32I-NEXT: beqz s2, .LBB14_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: beqz s2, .LBB14_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: mv s5, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB14_84:
+; RV32I-NEXT: .LBB14_90:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, t5, s9
-; RV32I-NEXT: bltu ra, t3, .LBB14_86
-; RV32I-NEXT: # %bb.85:
+; RV32I-NEXT: bltu ra, t3, .LBB14_92
+; RV32I-NEXT: # %bb.91:
; RV32I-NEXT: srl a3, t5, ra
-; RV32I-NEXT: mv s1, t1
-; RV32I-NEXT: bnez ra, .LBB14_87
-; RV32I-NEXT: j .LBB14_88
-; RV32I-NEXT: .LBB14_86:
+; RV32I-NEXT: j .LBB14_93
+; RV32I-NEXT: .LBB14_92:
; RV32I-NEXT: lw a3, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
+; RV32I-NEXT: .LBB14_93:
; RV32I-NEXT: mv s1, t1
-; RV32I-NEXT: beqz ra, .LBB14_88
-; RV32I-NEXT: .LBB14_87:
+; RV32I-NEXT: beqz ra, .LBB14_95
+; RV32I-NEXT: # %bb.94:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB14_88:
-; RV32I-NEXT: bltu ra, t3, .LBB14_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: .LBB14_95:
+; RV32I-NEXT: bltu ra, t3, .LBB14_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: srl a3, a6, ra
-; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: bnez ra, .LBB14_91
-; RV32I-NEXT: j .LBB14_92
-; RV32I-NEXT: .LBB14_90:
+; RV32I-NEXT: j .LBB14_98
+; RV32I-NEXT: .LBB14_97:
; RV32I-NEXT: srl s2, t5, a4
; RV32I-NEXT: sll a3, a6, s9
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: .LBB14_98:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz ra, .LBB14_92
-; RV32I-NEXT: .LBB14_91:
+; RV32I-NEXT: beqz ra, .LBB14_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB14_92:
+; RV32I-NEXT: .LBB14_100:
; RV32I-NEXT: sub s10, t6, ra
-; RV32I-NEXT: bltu ra, t3, .LBB14_95
-; RV32I-NEXT: # %bb.93:
+; RV32I-NEXT: bltu ra, t3, .LBB14_103
+; RV32I-NEXT: # %bb.101:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bgeu s10, t3, .LBB14_96
-; RV32I-NEXT: .LBB14_94:
+; RV32I-NEXT: bgeu s10, t3, .LBB14_104
+; RV32I-NEXT: .LBB14_102:
; RV32I-NEXT: sll s9, t1, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB14_97
-; RV32I-NEXT: .LBB14_95:
+; RV32I-NEXT: j .LBB14_105
+; RV32I-NEXT: .LBB14_103:
; RV32I-NEXT: srl s7, a6, a4
-; RV32I-NEXT: bltu s10, t3, .LBB14_94
-; RV32I-NEXT: .LBB14_96:
+; RV32I-NEXT: bltu s10, t3, .LBB14_102
+; RV32I-NEXT: .LBB14_104:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t1, s10
-; RV32I-NEXT: .LBB14_97:
+; RV32I-NEXT: .LBB14_105:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, t5
-; RV32I-NEXT: beqz s10, .LBB14_99
-; RV32I-NEXT: # %bb.98:
+; RV32I-NEXT: beqz s10, .LBB14_107
+; RV32I-NEXT: # %bb.106:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB14_99:
-; RV32I-NEXT: bltu s11, t3, .LBB14_101
-; RV32I-NEXT: # %bb.100:
+; RV32I-NEXT: .LBB14_107:
+; RV32I-NEXT: bltu s11, t3, .LBB14_109
+; RV32I-NEXT: # %bb.108:
; RV32I-NEXT: srl a3, t5, s11
-; RV32I-NEXT: bnez s11, .LBB14_102
-; RV32I-NEXT: j .LBB14_103
-; RV32I-NEXT: .LBB14_101:
+; RV32I-NEXT: bnez s11, .LBB14_110
+; RV32I-NEXT: j .LBB14_111
+; RV32I-NEXT: .LBB14_109:
; RV32I-NEXT: srl a3, t1, ra
; RV32I-NEXT: neg s10, s11
; RV32I-NEXT: sll s10, t5, s10
; RV32I-NEXT: or a3, a3, s10
-; RV32I-NEXT: beqz s11, .LBB14_103
-; RV32I-NEXT: .LBB14_102:
+; RV32I-NEXT: beqz s11, .LBB14_111
+; RV32I-NEXT: .LBB14_110:
; RV32I-NEXT: mv t1, a3
-; RV32I-NEXT: .LBB14_103:
-; RV32I-NEXT: bltu s11, t3, .LBB14_105
-; RV32I-NEXT: # %bb.104:
+; RV32I-NEXT: .LBB14_111:
+; RV32I-NEXT: bltu s11, t3, .LBB14_113
+; RV32I-NEXT: # %bb.112:
; RV32I-NEXT: li t3, 0
-; RV32I-NEXT: bltu ra, t6, .LBB14_106
-; RV32I-NEXT: j .LBB14_107
-; RV32I-NEXT: .LBB14_105:
+; RV32I-NEXT: bltu ra, t6, .LBB14_114
+; RV32I-NEXT: j .LBB14_115
+; RV32I-NEXT: .LBB14_113:
; RV32I-NEXT: srl t3, t5, ra
-; RV32I-NEXT: bgeu ra, t6, .LBB14_107
-; RV32I-NEXT: .LBB14_106:
+; RV32I-NEXT: bgeu ra, t6, .LBB14_115
+; RV32I-NEXT: .LBB14_114:
; RV32I-NEXT: or t1, a5, s9
; RV32I-NEXT: or t3, s7, s3
-; RV32I-NEXT: .LBB14_107:
+; RV32I-NEXT: .LBB14_115:
; RV32I-NEXT: li a5, 128
-; RV32I-NEXT: bnez ra, .LBB14_114
-; RV32I-NEXT: # %bb.108:
-; RV32I-NEXT: bgeu ra, t6, .LBB14_115
-; RV32I-NEXT: .LBB14_109:
-; RV32I-NEXT: bltu a4, a5, .LBB14_116
-; RV32I-NEXT: .LBB14_110:
+; RV32I-NEXT: bnez ra, .LBB14_122
+; RV32I-NEXT: # %bb.116:
+; RV32I-NEXT: bgeu ra, t6, .LBB14_123
+; RV32I-NEXT: .LBB14_117:
+; RV32I-NEXT: bltu a4, a5, .LBB14_124
+; RV32I-NEXT: .LBB14_118:
; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bnez a4, .LBB14_117
-; RV32I-NEXT: .LBB14_111:
-; RV32I-NEXT: bltu a4, a5, .LBB14_113
-; RV32I-NEXT: .LBB14_112:
+; RV32I-NEXT: bnez a4, .LBB14_125
+; RV32I-NEXT: # %bb.119:
+; RV32I-NEXT: bltu a4, a5, .LBB14_121
+; RV32I-NEXT: .LBB14_120:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: li ra, 0
-; RV32I-NEXT: .LBB14_113:
+; RV32I-NEXT: .LBB14_121:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli t0, s0, 24
@@ -5411,15 +5388,15 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB14_114:
+; RV32I-NEXT: .LBB14_122:
; RV32I-NEXT: mv t0, t1
; RV32I-NEXT: mv a6, t3
-; RV32I-NEXT: bltu ra, t6, .LBB14_109
-; RV32I-NEXT: .LBB14_115:
+; RV32I-NEXT: bltu ra, t6, .LBB14_117
+; RV32I-NEXT: .LBB14_123:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: li s2, 0
-; RV32I-NEXT: bgeu a4, a5, .LBB14_110
-; RV32I-NEXT: .LBB14_116:
+; RV32I-NEXT: bgeu a4, a5, .LBB14_118
+; RV32I-NEXT: .LBB14_124:
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t0, a3, a6
@@ -5429,15 +5406,14 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: or s1, a3, s5
; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: beqz a4, .LBB14_111
-; RV32I-NEXT: .LBB14_117:
+; RV32I-NEXT: j .LBB14_118
+; RV32I-NEXT: .LBB14_125:
; RV32I-NEXT: mv s0, t0
; RV32I-NEXT: mv a0, a6
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t2, s2
-; RV32I-NEXT: bgeu a4, a5, .LBB14_112
-; RV32I-NEXT: j .LBB14_113
+; RV32I-NEXT: bgeu a4, a5, .LBB14_120
+; RV32I-NEXT: j .LBB14_121
%src = load i256, ptr %src.ptr, align 1
%dwordOff = load i256, ptr %dwordOff.ptr, align 1
%bitOff = shl i256 %dwordOff, 6
@@ -5884,115 +5860,112 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: # %bb.19:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: sll a1, t3, s7
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: bnez s7, .LBB15_21
-; RV32I-NEXT: j .LBB15_22
+; RV32I-NEXT: j .LBB15_21
; RV32I-NEXT: .LBB15_20:
; RV32I-NEXT: sll s2, t3, a4
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: or a1, a1, s5
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: beqz s7, .LBB15_22
; RV32I-NEXT: .LBB15_21:
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: beqz s7, .LBB15_23
+; RV32I-NEXT: # %bb.22:
; RV32I-NEXT: mv s4, a1
-; RV32I-NEXT: .LBB15_22:
+; RV32I-NEXT: .LBB15_23:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: bltu a4, s9, .LBB15_24
-; RV32I-NEXT: # %bb.23:
+; RV32I-NEXT: bltu a4, s9, .LBB15_25
+; RV32I-NEXT: # %bb.24:
; RV32I-NEXT: sw zero, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: j .LBB15_25
-; RV32I-NEXT: .LBB15_24:
+; RV32I-NEXT: j .LBB15_26
+; RV32I-NEXT: .LBB15_25:
; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, a6, s1
; RV32I-NEXT: or s4, a7, s3
-; RV32I-NEXT: .LBB15_25:
+; RV32I-NEXT: .LBB15_26:
; RV32I-NEXT: sub ra, a1, a4
; RV32I-NEXT: mv a7, t1
; RV32I-NEXT: mv a6, t2
-; RV32I-NEXT: beqz a4, .LBB15_27
-; RV32I-NEXT: # %bb.26:
+; RV32I-NEXT: beqz a4, .LBB15_28
+; RV32I-NEXT: # %bb.27:
; RV32I-NEXT: mv a7, s2
; RV32I-NEXT: mv a6, s4
-; RV32I-NEXT: .LBB15_27:
+; RV32I-NEXT: .LBB15_28:
; RV32I-NEXT: neg s1, ra
; RV32I-NEXT: sll s2, t2, s1
-; RV32I-NEXT: bltu ra, t4, .LBB15_29
-; RV32I-NEXT: # %bb.28:
+; RV32I-NEXT: bltu ra, t4, .LBB15_30
+; RV32I-NEXT: # %bb.29:
; RV32I-NEXT: srl a1, t2, ra
-; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bnez ra, .LBB15_30
; RV32I-NEXT: j .LBB15_31
-; RV32I-NEXT: .LBB15_29:
-; RV32I-NEXT: or a1, s0, s2
-; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: beqz ra, .LBB15_31
; RV32I-NEXT: .LBB15_30:
-; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a1, s0, s2
; RV32I-NEXT: .LBB15_31:
-; RV32I-NEXT: bltu ra, t4, .LBB15_33
+; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: beqz ra, .LBB15_33
; RV32I-NEXT: # %bb.32:
+; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .LBB15_33:
+; RV32I-NEXT: bltu ra, t4, .LBB15_35
+; RV32I-NEXT: # %bb.34:
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: srl a1, a5, ra
-; RV32I-NEXT: mv t5, t3
-; RV32I-NEXT: bnez ra, .LBB15_34
-; RV32I-NEXT: j .LBB15_35
-; RV32I-NEXT: .LBB15_33:
+; RV32I-NEXT: j .LBB15_36
+; RV32I-NEXT: .LBB15_35:
; RV32I-NEXT: srl a1, t2, s10
; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, a5, s1
; RV32I-NEXT: or a1, t5, a1
+; RV32I-NEXT: .LBB15_36:
; RV32I-NEXT: mv t5, t3
-; RV32I-NEXT: beqz ra, .LBB15_35
-; RV32I-NEXT: .LBB15_34:
+; RV32I-NEXT: beqz ra, .LBB15_38
+; RV32I-NEXT: # %bb.37:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB15_35:
+; RV32I-NEXT: .LBB15_38:
; RV32I-NEXT: sub s3, s9, ra
-; RV32I-NEXT: bltu ra, t4, .LBB15_38
-; RV32I-NEXT: # %bb.36:
+; RV32I-NEXT: bltu ra, t4, .LBB15_41
+; RV32I-NEXT: # %bb.39:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s3, t4, .LBB15_39
-; RV32I-NEXT: .LBB15_37:
+; RV32I-NEXT: bgeu s3, t4, .LBB15_42
+; RV32I-NEXT: .LBB15_40:
; RV32I-NEXT: sll s1, t1, s1
; RV32I-NEXT: neg a1, s3
; RV32I-NEXT: srl a1, t1, a1
; RV32I-NEXT: or a1, a1, s2
-; RV32I-NEXT: j .LBB15_40
-; RV32I-NEXT: .LBB15_38:
+; RV32I-NEXT: j .LBB15_43
+; RV32I-NEXT: .LBB15_41:
; RV32I-NEXT: srl a1, a5, s10
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s3, t4, .LBB15_37
-; RV32I-NEXT: .LBB15_39:
+; RV32I-NEXT: bltu s3, t4, .LBB15_40
+; RV32I-NEXT: .LBB15_42:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: sll a1, t1, s3
-; RV32I-NEXT: .LBB15_40:
+; RV32I-NEXT: .LBB15_43:
; RV32I-NEXT: addi s4, ra, -64
; RV32I-NEXT: mv s2, t2
-; RV32I-NEXT: beqz s3, .LBB15_42
-; RV32I-NEXT: # %bb.41:
+; RV32I-NEXT: beqz s3, .LBB15_45
+; RV32I-NEXT: # %bb.44:
; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: .LBB15_42:
+; RV32I-NEXT: .LBB15_45:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s5, a7
-; RV32I-NEXT: bltu s4, t4, .LBB15_44
-; RV32I-NEXT: # %bb.43:
+; RV32I-NEXT: bltu s4, t4, .LBB15_47
+; RV32I-NEXT: # %bb.46:
; RV32I-NEXT: srl t0, t2, s4
-; RV32I-NEXT: j .LBB15_45
-; RV32I-NEXT: .LBB15_44:
+; RV32I-NEXT: j .LBB15_48
+; RV32I-NEXT: .LBB15_47:
; RV32I-NEXT: srl a1, t1, ra
; RV32I-NEXT: neg t0, s4
; RV32I-NEXT: sll t0, t2, t0
; RV32I-NEXT: or t0, a1, t0
-; RV32I-NEXT: .LBB15_45:
+; RV32I-NEXT: .LBB15_48:
; RV32I-NEXT: mv s0, s10
; RV32I-NEXT: mv a7, a6
; RV32I-NEXT: lbu s8, 19(a0)
; RV32I-NEXT: lbu a1, 23(a0)
; RV32I-NEXT: mv s3, t1
-; RV32I-NEXT: beqz s4, .LBB15_47
-; RV32I-NEXT: # %bb.46:
+; RV32I-NEXT: beqz s4, .LBB15_50
+; RV32I-NEXT: # %bb.49:
; RV32I-NEXT: mv s3, t0
-; RV32I-NEXT: .LBB15_47:
+; RV32I-NEXT: .LBB15_50:
; RV32I-NEXT: mv a6, a3
; RV32I-NEXT: lbu s10, 17(a0)
; RV32I-NEXT: lbu t0, 18(a0)
@@ -6001,25 +5974,25 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: slli a1, a1, 8
; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: bltu s4, t4, .LBB15_49
-; RV32I-NEXT: # %bb.48:
+; RV32I-NEXT: bltu s4, t4, .LBB15_52
+; RV32I-NEXT: # %bb.51:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB15_50
-; RV32I-NEXT: .LBB15_49:
+; RV32I-NEXT: j .LBB15_53
+; RV32I-NEXT: .LBB15_52:
; RV32I-NEXT: srl s4, t2, ra
-; RV32I-NEXT: .LBB15_50:
+; RV32I-NEXT: .LBB15_53:
; RV32I-NEXT: or s11, s8, t0
; RV32I-NEXT: lbu t0, 16(a0)
; RV32I-NEXT: lbu s8, 20(a0)
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: or t6, a1, t6
-; RV32I-NEXT: bgeu ra, a3, .LBB15_52
-; RV32I-NEXT: # %bb.51:
+; RV32I-NEXT: bgeu ra, a3, .LBB15_55
+; RV32I-NEXT: # %bb.54:
; RV32I-NEXT: or s3, t5, s1
; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s4, a1, s2
-; RV32I-NEXT: .LBB15_52:
+; RV32I-NEXT: .LBB15_55:
; RV32I-NEXT: or a1, s10, t0
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: or t0, s9, s8
@@ -6027,58 +6000,58 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: mv t5, t3
; RV32I-NEXT: mv s1, a5
; RV32I-NEXT: mv a3, a6
-; RV32I-NEXT: beqz ra, .LBB15_54
-; RV32I-NEXT: # %bb.53:
+; RV32I-NEXT: beqz ra, .LBB15_57
+; RV32I-NEXT: # %bb.56:
; RV32I-NEXT: mv t5, s3
; RV32I-NEXT: mv s1, s4
-; RV32I-NEXT: .LBB15_54:
+; RV32I-NEXT: .LBB15_57:
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, s11, a1
; RV32I-NEXT: or s1, t6, t0
; RV32I-NEXT: li a1, 64
; RV32I-NEXT: mv a6, a7
; RV32I-NEXT: mv a7, s0
-; RV32I-NEXT: bltu ra, a1, .LBB15_56
-; RV32I-NEXT: # %bb.55:
+; RV32I-NEXT: bltu ra, a1, .LBB15_59
+; RV32I-NEXT: # %bb.58:
; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .LBB15_56:
+; RV32I-NEXT: .LBB15_59:
; RV32I-NEXT: srl s3, s2, a7
; RV32I-NEXT: sll ra, s1, a4
; RV32I-NEXT: mv a7, s5
; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu a4, t4, .LBB15_58
-; RV32I-NEXT: # %bb.57:
+; RV32I-NEXT: bltu a4, t4, .LBB15_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, s2, a4
-; RV32I-NEXT: j .LBB15_59
-; RV32I-NEXT: .LBB15_58:
+; RV32I-NEXT: j .LBB15_62
+; RV32I-NEXT: .LBB15_61:
; RV32I-NEXT: sll a1, s2, a4
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a1, s3, ra
-; RV32I-NEXT: .LBB15_59:
+; RV32I-NEXT: .LBB15_62:
; RV32I-NEXT: lbu s9, 27(a0)
; RV32I-NEXT: lbu t6, 31(a0)
; RV32I-NEXT: mv t5, s1
-; RV32I-NEXT: beqz a4, .LBB15_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: beqz a4, .LBB15_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB15_61:
+; RV32I-NEXT: .LBB15_64:
; RV32I-NEXT: lbu s8, 25(a0)
; RV32I-NEXT: lbu s4, 26(a0)
; RV32I-NEXT: lbu s11, 29(a0)
; RV32I-NEXT: lbu s10, 30(a0)
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: slli t6, t6, 8
-; RV32I-NEXT: bltu s6, t4, .LBB15_63
-; RV32I-NEXT: # %bb.62:
+; RV32I-NEXT: bltu s6, t4, .LBB15_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: srl t0, s1, s6
-; RV32I-NEXT: j .LBB15_64
-; RV32I-NEXT: .LBB15_63:
+; RV32I-NEXT: j .LBB15_67
+; RV32I-NEXT: .LBB15_66:
; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: sll a1, s1, a1
; RV32I-NEXT: or t0, s3, a1
-; RV32I-NEXT: .LBB15_64:
+; RV32I-NEXT: .LBB15_67:
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: lbu s3, 24(a0)
; RV32I-NEXT: lbu a1, 28(a0)
@@ -6086,174 +6059,170 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: or t6, t6, s10
; RV32I-NEXT: mv s9, s2
-; RV32I-NEXT: beqz s6, .LBB15_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: beqz s6, .LBB15_69
+; RV32I-NEXT: # %bb.68:
; RV32I-NEXT: mv s9, t0
-; RV32I-NEXT: .LBB15_66:
+; RV32I-NEXT: .LBB15_69:
; RV32I-NEXT: or a0, s8, s3
; RV32I-NEXT: slli t0, s4, 16
; RV32I-NEXT: or a1, s11, a1
; RV32I-NEXT: slli t6, t6, 16
-; RV32I-NEXT: bltu s6, t4, .LBB15_68
-; RV32I-NEXT: # %bb.67:
+; RV32I-NEXT: bltu s6, t4, .LBB15_71
+; RV32I-NEXT: # %bb.70:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB15_69
-; RV32I-NEXT: .LBB15_68:
+; RV32I-NEXT: j .LBB15_72
+; RV32I-NEXT: .LBB15_71:
; RV32I-NEXT: srl s4, s1, s0
-; RV32I-NEXT: .LBB15_69:
+; RV32I-NEXT: .LBB15_72:
; RV32I-NEXT: li s11, 64
; RV32I-NEXT: or s6, t0, a0
; RV32I-NEXT: or a0, t6, a1
-; RV32I-NEXT: bltu a4, t4, .LBB15_71
-; RV32I-NEXT: # %bb.70:
+; RV32I-NEXT: bltu a4, t4, .LBB15_74
+; RV32I-NEXT: # %bb.73:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a1, s6, a4
-; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: bnez a4, .LBB15_72
-; RV32I-NEXT: j .LBB15_73
-; RV32I-NEXT: .LBB15_71:
+; RV32I-NEXT: j .LBB15_75
+; RV32I-NEXT: .LBB15_74:
; RV32I-NEXT: sll s3, s6, a4
; RV32I-NEXT: srl a1, s6, s0
; RV32I-NEXT: sll t0, a0, a4
; RV32I-NEXT: or a1, a1, t0
+; RV32I-NEXT: .LBB15_75:
; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: beqz a4, .LBB15_73
-; RV32I-NEXT: .LBB15_72:
+; RV32I-NEXT: beqz a4, .LBB15_77
+; RV32I-NEXT: # %bb.76:
; RV32I-NEXT: mv s10, a1
-; RV32I-NEXT: .LBB15_73:
-; RV32I-NEXT: bltu s7, t4, .LBB15_75
-; RV32I-NEXT: # %bb.74:
+; RV32I-NEXT: .LBB15_77:
+; RV32I-NEXT: bltu s7, t4, .LBB15_79
+; RV32I-NEXT: # %bb.78:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, s2, s7
-; RV32I-NEXT: mv s0, s1
-; RV32I-NEXT: bnez s7, .LBB15_76
-; RV32I-NEXT: j .LBB15_77
-; RV32I-NEXT: .LBB15_75:
+; RV32I-NEXT: j .LBB15_80
+; RV32I-NEXT: .LBB15_79:
; RV32I-NEXT: sll s5, s2, a4
; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: srl a1, s2, a1
; RV32I-NEXT: or a1, a1, ra
+; RV32I-NEXT: .LBB15_80:
; RV32I-NEXT: mv s0, s1
-; RV32I-NEXT: beqz s7, .LBB15_77
-; RV32I-NEXT: .LBB15_76:
+; RV32I-NEXT: beqz s7, .LBB15_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB15_77:
-; RV32I-NEXT: bltu a4, s11, .LBB15_79
-; RV32I-NEXT: # %bb.78:
+; RV32I-NEXT: .LBB15_82:
+; RV32I-NEXT: bltu a4, s11, .LBB15_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: li t5, 0
-; RV32I-NEXT: j .LBB15_80
-; RV32I-NEXT: .LBB15_79:
+; RV32I-NEXT: j .LBB15_85
+; RV32I-NEXT: .LBB15_84:
; RV32I-NEXT: or s5, s9, s3
; RV32I-NEXT: or s0, s4, s10
-; RV32I-NEXT: .LBB15_80:
+; RV32I-NEXT: .LBB15_85:
; RV32I-NEXT: addi s9, a4, -128
; RV32I-NEXT: mv s7, s6
; RV32I-NEXT: mv s8, a0
-; RV32I-NEXT: beqz a4, .LBB15_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: beqz a4, .LBB15_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: mv s7, s5
; RV32I-NEXT: mv s8, s0
-; RV32I-NEXT: .LBB15_82:
+; RV32I-NEXT: .LBB15_87:
; RV32I-NEXT: neg s3, s9
; RV32I-NEXT: srl s0, t3, s3
-; RV32I-NEXT: bltu s9, t4, .LBB15_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: bltu s9, t4, .LBB15_89
+; RV32I-NEXT: # %bb.88:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, t3, s9
-; RV32I-NEXT: j .LBB15_85
-; RV32I-NEXT: .LBB15_84:
+; RV32I-NEXT: j .LBB15_90
+; RV32I-NEXT: .LBB15_89:
; RV32I-NEXT: sll s5, t3, a4
; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, s0, a1
-; RV32I-NEXT: .LBB15_85:
+; RV32I-NEXT: .LBB15_90:
; RV32I-NEXT: sub s4, s11, s9
; RV32I-NEXT: mv t6, a5
-; RV32I-NEXT: beqz s9, .LBB15_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: beqz s9, .LBB15_92
+; RV32I-NEXT: # %bb.91:
; RV32I-NEXT: mv t6, a1
-; RV32I-NEXT: .LBB15_87:
-; RV32I-NEXT: bltu s4, t4, .LBB15_89
-; RV32I-NEXT: # %bb.88:
+; RV32I-NEXT: .LBB15_92:
+; RV32I-NEXT: bltu s4, t4, .LBB15_94
+; RV32I-NEXT: # %bb.93:
; RV32I-NEXT: srl a1, a5, s4
-; RV32I-NEXT: mv s0, t3
-; RV32I-NEXT: bnez s4, .LBB15_90
-; RV32I-NEXT: j .LBB15_91
-; RV32I-NEXT: .LBB15_89:
+; RV32I-NEXT: j .LBB15_95
+; RV32I-NEXT: .LBB15_94:
; RV32I-NEXT: neg a1, s4
; RV32I-NEXT: sll a1, a5, a1
; RV32I-NEXT: or a1, s0, a1
+; RV32I-NEXT: .LBB15_95:
; RV32I-NEXT: mv s0, t3
-; RV32I-NEXT: beqz s4, .LBB15_91
-; RV32I-NEXT: .LBB15_90:
+; RV32I-NEXT: beqz s4, .LBB15_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB15_91:
-; RV32I-NEXT: bltu s4, t4, .LBB15_94
-; RV32I-NEXT: # %bb.92:
+; RV32I-NEXT: .LBB15_97:
+; RV32I-NEXT: bltu s4, t4, .LBB15_101
+; RV32I-NEXT: # %bb.98:
; RV32I-NEXT: li s4, 0
+; RV32I-NEXT: .LBB15_99:
; RV32I-NEXT: li ra, 64
-; RV32I-NEXT: bgeu s9, t4, .LBB15_95
-; RV32I-NEXT: .LBB15_93:
+; RV32I-NEXT: bgeu s9, t4, .LBB15_102
+; RV32I-NEXT: # %bb.100:
; RV32I-NEXT: sll s10, t1, a4
; RV32I-NEXT: srl a1, t1, s3
; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: j .LBB15_96
-; RV32I-NEXT: .LBB15_94:
+; RV32I-NEXT: j .LBB15_103
+; RV32I-NEXT: .LBB15_101:
; RV32I-NEXT: srl s4, a5, s3
-; RV32I-NEXT: li ra, 64
-; RV32I-NEXT: bltu s9, t4, .LBB15_93
-; RV32I-NEXT: .LBB15_95:
+; RV32I-NEXT: j .LBB15_99
+; RV32I-NEXT: .LBB15_102:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a1, t1, s9
-; RV32I-NEXT: .LBB15_96:
+; RV32I-NEXT: .LBB15_103:
; RV32I-NEXT: addi s11, s9, -64
; RV32I-NEXT: mv s3, t2
-; RV32I-NEXT: beqz s9, .LBB15_98
-; RV32I-NEXT: # %bb.97:
+; RV32I-NEXT: beqz s9, .LBB15_105
+; RV32I-NEXT: # %bb.104:
; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: .LBB15_98:
-; RV32I-NEXT: bltu s11, t4, .LBB15_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: .LBB15_105:
+; RV32I-NEXT: bltu s11, t4, .LBB15_107
+; RV32I-NEXT: # %bb.106:
; RV32I-NEXT: li t4, 0
; RV32I-NEXT: sll a1, t3, s11
-; RV32I-NEXT: bnez s11, .LBB15_101
-; RV32I-NEXT: j .LBB15_102
-; RV32I-NEXT: .LBB15_100:
+; RV32I-NEXT: bnez s11, .LBB15_108
+; RV32I-NEXT: j .LBB15_109
+; RV32I-NEXT: .LBB15_107:
; RV32I-NEXT: sll t4, t3, s9
; RV32I-NEXT: neg a1, s11
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: sll t0, a5, s9
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: beqz s11, .LBB15_102
-; RV32I-NEXT: .LBB15_101:
+; RV32I-NEXT: beqz s11, .LBB15_109
+; RV32I-NEXT: .LBB15_108:
; RV32I-NEXT: mv a5, a1
-; RV32I-NEXT: .LBB15_102:
-; RV32I-NEXT: bltu s9, ra, .LBB15_104
-; RV32I-NEXT: # %bb.103:
+; RV32I-NEXT: .LBB15_109:
+; RV32I-NEXT: bltu s9, ra, .LBB15_111
+; RV32I-NEXT: # %bb.110:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li t6, 0
-; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: bnez s9, .LBB15_105
-; RV32I-NEXT: j .LBB15_106
-; RV32I-NEXT: .LBB15_104:
+; RV32I-NEXT: j .LBB15_112
+; RV32I-NEXT: .LBB15_111:
; RV32I-NEXT: or t4, s0, s10
; RV32I-NEXT: or a5, s4, s3
+; RV32I-NEXT: .LBB15_112:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: beqz s9, .LBB15_106
-; RV32I-NEXT: .LBB15_105:
+; RV32I-NEXT: beqz s9, .LBB15_114
+; RV32I-NEXT: # %bb.113:
; RV32I-NEXT: mv t1, t4
; RV32I-NEXT: mv t2, a5
-; RV32I-NEXT: .LBB15_106:
-; RV32I-NEXT: bltu a4, a1, .LBB15_108
-; RV32I-NEXT: # %bb.107:
+; RV32I-NEXT: .LBB15_114:
+; RV32I-NEXT: bltu a4, a1, .LBB15_116
+; RV32I-NEXT: # %bb.115:
; RV32I-NEXT: li ra, 0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a6, 0
-; RV32I-NEXT: bnez a4, .LBB15_109
-; RV32I-NEXT: j .LBB15_110
-; RV32I-NEXT: .LBB15_108:
+; RV32I-NEXT: bnez a4, .LBB15_117
+; RV32I-NEXT: j .LBB15_118
+; RV32I-NEXT: .LBB15_116:
; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s5, a1, a5
@@ -6264,13 +6233,13 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t2, a1, s8
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: beqz a4, .LBB15_110
-; RV32I-NEXT: .LBB15_109:
+; RV32I-NEXT: beqz a4, .LBB15_118
+; RV32I-NEXT: .LBB15_117:
; RV32I-NEXT: mv s2, s5
; RV32I-NEXT: mv s1, t6
; RV32I-NEXT: mv s6, t1
; RV32I-NEXT: mv a0, t2
-; RV32I-NEXT: .LBB15_110:
+; RV32I-NEXT: .LBB15_118:
; RV32I-NEXT: srli a4, ra, 16
; RV32I-NEXT: lui t2, 16
; RV32I-NEXT: srli t1, ra, 24
@@ -6798,115 +6767,112 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: # %bb.19:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: sll a1, t3, s7
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: bnez s7, .LBB16_21
-; RV32I-NEXT: j .LBB16_22
+; RV32I-NEXT: j .LBB16_21
; RV32I-NEXT: .LBB16_20:
; RV32I-NEXT: sll s2, t3, a4
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: or a1, a1, s5
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: beqz s7, .LBB16_22
; RV32I-NEXT: .LBB16_21:
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: beqz s7, .LBB16_23
+; RV32I-NEXT: # %bb.22:
; RV32I-NEXT: mv s4, a1
-; RV32I-NEXT: .LBB16_22:
+; RV32I-NEXT: .LBB16_23:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: bltu a4, s9, .LBB16_24
-; RV32I-NEXT: # %bb.23:
+; RV32I-NEXT: bltu a4, s9, .LBB16_25
+; RV32I-NEXT: # %bb.24:
; RV32I-NEXT: sw zero, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: j .LBB16_25
-; RV32I-NEXT: .LBB16_24:
+; RV32I-NEXT: j .LBB16_26
+; RV32I-NEXT: .LBB16_25:
; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, a6, s1
; RV32I-NEXT: or s4, a7, s3
-; RV32I-NEXT: .LBB16_25:
+; RV32I-NEXT: .LBB16_26:
; RV32I-NEXT: sub ra, a1, a4
; RV32I-NEXT: mv a7, t1
; RV32I-NEXT: mv a6, t2
-; RV32I-NEXT: beqz a4, .LBB16_27
-; RV32I-NEXT: # %bb.26:
+; RV32I-NEXT: beqz a4, .LBB16_28
+; RV32I-NEXT: # %bb.27:
; RV32I-NEXT: mv a7, s2
; RV32I-NEXT: mv a6, s4
-; RV32I-NEXT: .LBB16_27:
+; RV32I-NEXT: .LBB16_28:
; RV32I-NEXT: neg s1, ra
; RV32I-NEXT: sll s2, t2, s1
-; RV32I-NEXT: bltu ra, t4, .LBB16_29
-; RV32I-NEXT: # %bb.28:
+; RV32I-NEXT: bltu ra, t4, .LBB16_30
+; RV32I-NEXT: # %bb.29:
; RV32I-NEXT: srl a1, t2, ra
-; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bnez ra, .LBB16_30
; RV32I-NEXT: j .LBB16_31
-; RV32I-NEXT: .LBB16_29:
-; RV32I-NEXT: or a1, s0, s2
-; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: beqz ra, .LBB16_31
; RV32I-NEXT: .LBB16_30:
-; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a1, s0, s2
; RV32I-NEXT: .LBB16_31:
-; RV32I-NEXT: bltu ra, t4, .LBB16_33
+; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: beqz ra, .LBB16_33
; RV32I-NEXT: # %bb.32:
+; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .LBB16_33:
+; RV32I-NEXT: bltu ra, t4, .LBB16_35
+; RV32I-NEXT: # %bb.34:
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: srl a1, a5, ra
-; RV32I-NEXT: mv t5, t3
-; RV32I-NEXT: bnez ra, .LBB16_34
-; RV32I-NEXT: j .LBB16_35
-; RV32I-NEXT: .LBB16_33:
+; RV32I-NEXT: j .LBB16_36
+; RV32I-NEXT: .LBB16_35:
; RV32I-NEXT: srl a1, t2, s10
; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, a5, s1
; RV32I-NEXT: or a1, t5, a1
+; RV32I-NEXT: .LBB16_36:
; RV32I-NEXT: mv t5, t3
-; RV32I-NEXT: beqz ra, .LBB16_35
-; RV32I-NEXT: .LBB16_34:
+; RV32I-NEXT: beqz ra, .LBB16_38
+; RV32I-NEXT: # %bb.37:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB16_35:
+; RV32I-NEXT: .LBB16_38:
; RV32I-NEXT: sub s3, s9, ra
-; RV32I-NEXT: bltu ra, t4, .LBB16_38
-; RV32I-NEXT: # %bb.36:
+; RV32I-NEXT: bltu ra, t4, .LBB16_41
+; RV32I-NEXT: # %bb.39:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s3, t4, .LBB16_39
-; RV32I-NEXT: .LBB16_37:
+; RV32I-NEXT: bgeu s3, t4, .LBB16_42
+; RV32I-NEXT: .LBB16_40:
; RV32I-NEXT: sll s1, t1, s1
; RV32I-NEXT: neg a1, s3
; RV32I-NEXT: srl a1, t1, a1
; RV32I-NEXT: or a1, a1, s2
-; RV32I-NEXT: j .LBB16_40
-; RV32I-NEXT: .LBB16_38:
+; RV32I-NEXT: j .LBB16_43
+; RV32I-NEXT: .LBB16_41:
; RV32I-NEXT: srl a1, a5, s10
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s3, t4, .LBB16_37
-; RV32I-NEXT: .LBB16_39:
+; RV32I-NEXT: bltu s3, t4, .LBB16_40
+; RV32I-NEXT: .LBB16_42:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: sll a1, t1, s3
-; RV32I-NEXT: .LBB16_40:
+; RV32I-NEXT: .LBB16_43:
; RV32I-NEXT: addi s4, ra, -64
; RV32I-NEXT: mv s2, t2
-; RV32I-NEXT: beqz s3, .LBB16_42
-; RV32I-NEXT: # %bb.41:
+; RV32I-NEXT: beqz s3, .LBB16_45
+; RV32I-NEXT: # %bb.44:
; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: .LBB16_42:
+; RV32I-NEXT: .LBB16_45:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s5, a7
-; RV32I-NEXT: bltu s4, t4, .LBB16_44
-; RV32I-NEXT: # %bb.43:
+; RV32I-NEXT: bltu s4, t4, .LBB16_47
+; RV32I-NEXT: # %bb.46:
; RV32I-NEXT: srl t0, t2, s4
-; RV32I-NEXT: j .LBB16_45
-; RV32I-NEXT: .LBB16_44:
+; RV32I-NEXT: j .LBB16_48
+; RV32I-NEXT: .LBB16_47:
; RV32I-NEXT: srl a1, t1, ra
; RV32I-NEXT: neg t0, s4
; RV32I-NEXT: sll t0, t2, t0
; RV32I-NEXT: or t0, a1, t0
-; RV32I-NEXT: .LBB16_45:
+; RV32I-NEXT: .LBB16_48:
; RV32I-NEXT: mv s0, s10
; RV32I-NEXT: mv a7, a6
; RV32I-NEXT: lbu s8, 19(a0)
; RV32I-NEXT: lbu a1, 23(a0)
; RV32I-NEXT: mv s3, t1
-; RV32I-NEXT: beqz s4, .LBB16_47
-; RV32I-NEXT: # %bb.46:
+; RV32I-NEXT: beqz s4, .LBB16_50
+; RV32I-NEXT: # %bb.49:
; RV32I-NEXT: mv s3, t0
-; RV32I-NEXT: .LBB16_47:
+; RV32I-NEXT: .LBB16_50:
; RV32I-NEXT: mv a6, a3
; RV32I-NEXT: lbu s10, 17(a0)
; RV32I-NEXT: lbu t0, 18(a0)
@@ -6915,25 +6881,25 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: slli a1, a1, 8
; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: bltu s4, t4, .LBB16_49
-; RV32I-NEXT: # %bb.48:
+; RV32I-NEXT: bltu s4, t4, .LBB16_52
+; RV32I-NEXT: # %bb.51:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB16_50
-; RV32I-NEXT: .LBB16_49:
+; RV32I-NEXT: j .LBB16_53
+; RV32I-NEXT: .LBB16_52:
; RV32I-NEXT: srl s4, t2, ra
-; RV32I-NEXT: .LBB16_50:
+; RV32I-NEXT: .LBB16_53:
; RV32I-NEXT: or s11, s8, t0
; RV32I-NEXT: lbu t0, 16(a0)
; RV32I-NEXT: lbu s8, 20(a0)
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: or t6, a1, t6
-; RV32I-NEXT: bgeu ra, a3, .LBB16_52
-; RV32I-NEXT: # %bb.51:
+; RV32I-NEXT: bgeu ra, a3, .LBB16_55
+; RV32I-NEXT: # %bb.54:
; RV32I-NEXT: or s3, t5, s1
; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s4, a1, s2
-; RV32I-NEXT: .LBB16_52:
+; RV32I-NEXT: .LBB16_55:
; RV32I-NEXT: or a1, s10, t0
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: or t0, s9, s8
@@ -6941,58 +6907,58 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: mv t5, t3
; RV32I-NEXT: mv s1, a5
; RV32I-NEXT: mv a3, a6
-; RV32I-NEXT: beqz ra, .LBB16_54
-; RV32I-NEXT: # %bb.53:
+; RV32I-NEXT: beqz ra, .LBB16_57
+; RV32I-NEXT: # %bb.56:
; RV32I-NEXT: mv t5, s3
; RV32I-NEXT: mv s1, s4
-; RV32I-NEXT: .LBB16_54:
+; RV32I-NEXT: .LBB16_57:
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, s11, a1
; RV32I-NEXT: or s1, t6, t0
; RV32I-NEXT: li a1, 64
; RV32I-NEXT: mv a6, a7
; RV32I-NEXT: mv a7, s0
-; RV32I-NEXT: bltu ra, a1, .LBB16_56
-; RV32I-NEXT: # %bb.55:
+; RV32I-NEXT: bltu ra, a1, .LBB16_59
+; RV32I-NEXT: # %bb.58:
; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .LBB16_56:
+; RV32I-NEXT: .LBB16_59:
; RV32I-NEXT: srl s3, s2, a7
; RV32I-NEXT: sll ra, s1, a4
; RV32I-NEXT: mv a7, s5
; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu a4, t4, .LBB16_58
-; RV32I-NEXT: # %bb.57:
+; RV32I-NEXT: bltu a4, t4, .LBB16_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, s2, a4
-; RV32I-NEXT: j .LBB16_59
-; RV32I-NEXT: .LBB16_58:
+; RV32I-NEXT: j .LBB16_62
+; RV32I-NEXT: .LBB16_61:
; RV32I-NEXT: sll a1, s2, a4
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a1, s3, ra
-; RV32I-NEXT: .LBB16_59:
+; RV32I-NEXT: .LBB16_62:
; RV32I-NEXT: lbu s9, 27(a0)
; RV32I-NEXT: lbu t6, 31(a0)
; RV32I-NEXT: mv t5, s1
-; RV32I-NEXT: beqz a4, .LBB16_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: beqz a4, .LBB16_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB16_61:
+; RV32I-NEXT: .LBB16_64:
; RV32I-NEXT: lbu s8, 25(a0)
; RV32I-NEXT: lbu s4, 26(a0)
; RV32I-NEXT: lbu s11, 29(a0)
; RV32I-NEXT: lbu s10, 30(a0)
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: slli t6, t6, 8
-; RV32I-NEXT: bltu s6, t4, .LBB16_63
-; RV32I-NEXT: # %bb.62:
+; RV32I-NEXT: bltu s6, t4, .LBB16_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: srl t0, s1, s6
-; RV32I-NEXT: j .LBB16_64
-; RV32I-NEXT: .LBB16_63:
+; RV32I-NEXT: j .LBB16_67
+; RV32I-NEXT: .LBB16_66:
; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: sll a1, s1, a1
; RV32I-NEXT: or t0, s3, a1
-; RV32I-NEXT: .LBB16_64:
+; RV32I-NEXT: .LBB16_67:
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: lbu s3, 24(a0)
; RV32I-NEXT: lbu a1, 28(a0)
@@ -7000,174 +6966,170 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: or t6, t6, s10
; RV32I-NEXT: mv s9, s2
-; RV32I-NEXT: beqz s6, .LBB16_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: beqz s6, .LBB16_69
+; RV32I-NEXT: # %bb.68:
; RV32I-NEXT: mv s9, t0
-; RV32I-NEXT: .LBB16_66:
+; RV32I-NEXT: .LBB16_69:
; RV32I-NEXT: or a0, s8, s3
; RV32I-NEXT: slli t0, s4, 16
; RV32I-NEXT: or a1, s11, a1
; RV32I-NEXT: slli t6, t6, 16
-; RV32I-NEXT: bltu s6, t4, .LBB16_68
-; RV32I-NEXT: # %bb.67:
+; RV32I-NEXT: bltu s6, t4, .LBB16_71
+; RV32I-NEXT: # %bb.70:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB16_69
-; RV32I-NEXT: .LBB16_68:
+; RV32I-NEXT: j .LBB16_72
+; RV32I-NEXT: .LBB16_71:
; RV32I-NEXT: srl s4, s1, s0
-; RV32I-NEXT: .LBB16_69:
+; RV32I-NEXT: .LBB16_72:
; RV32I-NEXT: li s11, 64
; RV32I-NEXT: or s6, t0, a0
; RV32I-NEXT: or a0, t6, a1
-; RV32I-NEXT: bltu a4, t4, .LBB16_71
-; RV32I-NEXT: # %bb.70:
+; RV32I-NEXT: bltu a4, t4, .LBB16_74
+; RV32I-NEXT: # %bb.73:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a1, s6, a4
-; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: bnez a4, .LBB16_72
-; RV32I-NEXT: j .LBB16_73
-; RV32I-NEXT: .LBB16_71:
+; RV32I-NEXT: j .LBB16_75
+; RV32I-NEXT: .LBB16_74:
; RV32I-NEXT: sll s3, s6, a4
; RV32I-NEXT: srl a1, s6, s0
; RV32I-NEXT: sll t0, a0, a4
; RV32I-NEXT: or a1, a1, t0
+; RV32I-NEXT: .LBB16_75:
; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: beqz a4, .LBB16_73
-; RV32I-NEXT: .LBB16_72:
+; RV32I-NEXT: beqz a4, .LBB16_77
+; RV32I-NEXT: # %bb.76:
; RV32I-NEXT: mv s10, a1
-; RV32I-NEXT: .LBB16_73:
-; RV32I-NEXT: bltu s7, t4, .LBB16_75
-; RV32I-NEXT: # %bb.74:
+; RV32I-NEXT: .LBB16_77:
+; RV32I-NEXT: bltu s7, t4, .LBB16_79
+; RV32I-NEXT: # %bb.78:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, s2, s7
-; RV32I-NEXT: mv s0, s1
-; RV32I-NEXT: bnez s7, .LBB16_76
-; RV32I-NEXT: j .LBB16_77
-; RV32I-NEXT: .LBB16_75:
+; RV32I-NEXT: j .LBB16_80
+; RV32I-NEXT: .LBB16_79:
; RV32I-NEXT: sll s5, s2, a4
; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: srl a1, s2, a1
; RV32I-NEXT: or a1, a1, ra
+; RV32I-NEXT: .LBB16_80:
; RV32I-NEXT: mv s0, s1
-; RV32I-NEXT: beqz s7, .LBB16_77
-; RV32I-NEXT: .LBB16_76:
+; RV32I-NEXT: beqz s7, .LBB16_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB16_77:
-; RV32I-NEXT: bltu a4, s11, .LBB16_79
-; RV32I-NEXT: # %bb.78:
+; RV32I-NEXT: .LBB16_82:
+; RV32I-NEXT: bltu a4, s11, .LBB16_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: li t5, 0
-; RV32I-NEXT: j .LBB16_80
-; RV32I-NEXT: .LBB16_79:
+; RV32I-NEXT: j .LBB16_85
+; RV32I-NEXT: .LBB16_84:
; RV32I-NEXT: or s5, s9, s3
; RV32I-NEXT: or s0, s4, s10
-; RV32I-NEXT: .LBB16_80:
+; RV32I-NEXT: .LBB16_85:
; RV32I-NEXT: addi s9, a4, -128
; RV32I-NEXT: mv s7, s6
; RV32I-NEXT: mv s8, a0
-; RV32I-NEXT: beqz a4, .LBB16_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: beqz a4, .LBB16_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: mv s7, s5
; RV32I-NEXT: mv s8, s0
-; RV32I-NEXT: .LBB16_82:
+; RV32I-NEXT: .LBB16_87:
; RV32I-NEXT: neg s3, s9
; RV32I-NEXT: srl s0, t3, s3
-; RV32I-NEXT: bltu s9, t4, .LBB16_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: bltu s9, t4, .LBB16_89
+; RV32I-NEXT: # %bb.88:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, t3, s9
-; RV32I-NEXT: j .LBB16_85
-; RV32I-NEXT: .LBB16_84:
+; RV32I-NEXT: j .LBB16_90
+; RV32I-NEXT: .LBB16_89:
; RV32I-NEXT: sll s5, t3, a4
; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, s0, a1
-; RV32I-NEXT: .LBB16_85:
+; RV32I-NEXT: .LBB16_90:
; RV32I-NEXT: sub s4, s11, s9
; RV32I-NEXT: mv t6, a5
-; RV32I-NEXT: beqz s9, .LBB16_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: beqz s9, .LBB16_92
+; RV32I-NEXT: # %bb.91:
; RV32I-NEXT: mv t6, a1
-; RV32I-NEXT: .LBB16_87:
-; RV32I-NEXT: bltu s4, t4, .LBB16_89
-; RV32I-NEXT: # %bb.88:
+; RV32I-NEXT: .LBB16_92:
+; RV32I-NEXT: bltu s4, t4, .LBB16_94
+; RV32I-NEXT: # %bb.93:
; RV32I-NEXT: srl a1, a5, s4
-; RV32I-NEXT: mv s0, t3
-; RV32I-NEXT: bnez s4, .LBB16_90
-; RV32I-NEXT: j .LBB16_91
-; RV32I-NEXT: .LBB16_89:
+; RV32I-NEXT: j .LBB16_95
+; RV32I-NEXT: .LBB16_94:
; RV32I-NEXT: neg a1, s4
; RV32I-NEXT: sll a1, a5, a1
; RV32I-NEXT: or a1, s0, a1
+; RV32I-NEXT: .LBB16_95:
; RV32I-NEXT: mv s0, t3
-; RV32I-NEXT: beqz s4, .LBB16_91
-; RV32I-NEXT: .LBB16_90:
+; RV32I-NEXT: beqz s4, .LBB16_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB16_91:
-; RV32I-NEXT: bltu s4, t4, .LBB16_94
-; RV32I-NEXT: # %bb.92:
+; RV32I-NEXT: .LBB16_97:
+; RV32I-NEXT: bltu s4, t4, .LBB16_101
+; RV32I-NEXT: # %bb.98:
; RV32I-NEXT: li s4, 0
+; RV32I-NEXT: .LBB16_99:
; RV32I-NEXT: li ra, 64
-; RV32I-NEXT: bgeu s9, t4, .LBB16_95
-; RV32I-NEXT: .LBB16_93:
+; RV32I-NEXT: bgeu s9, t4, .LBB16_102
+; RV32I-NEXT: # %bb.100:
; RV32I-NEXT: sll s10, t1, a4
; RV32I-NEXT: srl a1, t1, s3
; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: j .LBB16_96
-; RV32I-NEXT: .LBB16_94:
+; RV32I-NEXT: j .LBB16_103
+; RV32I-NEXT: .LBB16_101:
; RV32I-NEXT: srl s4, a5, s3
-; RV32I-NEXT: li ra, 64
-; RV32I-NEXT: bltu s9, t4, .LBB16_93
-; RV32I-NEXT: .LBB16_95:
+; RV32I-NEXT: j .LBB16_99
+; RV32I-NEXT: .LBB16_102:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a1, t1, s9
-; RV32I-NEXT: .LBB16_96:
+; RV32I-NEXT: .LBB16_103:
; RV32I-NEXT: addi s11, s9, -64
; RV32I-NEXT: mv s3, t2
-; RV32I-NEXT: beqz s9, .LBB16_98
-; RV32I-NEXT: # %bb.97:
+; RV32I-NEXT: beqz s9, .LBB16_105
+; RV32I-NEXT: # %bb.104:
; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: .LBB16_98:
-; RV32I-NEXT: bltu s11, t4, .LBB16_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: .LBB16_105:
+; RV32I-NEXT: bltu s11, t4, .LBB16_107
+; RV32I-NEXT: # %bb.106:
; RV32I-NEXT: li t4, 0
; RV32I-NEXT: sll a1, t3, s11
-; RV32I-NEXT: bnez s11, .LBB16_101
-; RV32I-NEXT: j .LBB16_102
-; RV32I-NEXT: .LBB16_100:
+; RV32I-NEXT: bnez s11, .LBB16_108
+; RV32I-NEXT: j .LBB16_109
+; RV32I-NEXT: .LBB16_107:
; RV32I-NEXT: sll t4, t3, s9
; RV32I-NEXT: neg a1, s11
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: sll t0, a5, s9
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: beqz s11, .LBB16_102
-; RV32I-NEXT: .LBB16_101:
+; RV32I-NEXT: beqz s11, .LBB16_109
+; RV32I-NEXT: .LBB16_108:
; RV32I-NEXT: mv a5, a1
-; RV32I-NEXT: .LBB16_102:
-; RV32I-NEXT: bltu s9, ra, .LBB16_104
-; RV32I-NEXT: # %bb.103:
+; RV32I-NEXT: .LBB16_109:
+; RV32I-NEXT: bltu s9, ra, .LBB16_111
+; RV32I-NEXT: # %bb.110:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li t6, 0
-; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: bnez s9, .LBB16_105
-; RV32I-NEXT: j .LBB16_106
-; RV32I-NEXT: .LBB16_104:
+; RV32I-NEXT: j .LBB16_112
+; RV32I-NEXT: .LBB16_111:
; RV32I-NEXT: or t4, s0, s10
; RV32I-NEXT: or a5, s4, s3
+; RV32I-NEXT: .LBB16_112:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: beqz s9, .LBB16_106
-; RV32I-NEXT: .LBB16_105:
+; RV32I-NEXT: beqz s9, .LBB16_114
+; RV32I-NEXT: # %bb.113:
; RV32I-NEXT: mv t1, t4
; RV32I-NEXT: mv t2, a5
-; RV32I-NEXT: .LBB16_106:
-; RV32I-NEXT: bltu a4, a1, .LBB16_108
-; RV32I-NEXT: # %bb.107:
+; RV32I-NEXT: .LBB16_114:
+; RV32I-NEXT: bltu a4, a1, .LBB16_116
+; RV32I-NEXT: # %bb.115:
; RV32I-NEXT: li ra, 0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a6, 0
-; RV32I-NEXT: bnez a4, .LBB16_109
-; RV32I-NEXT: j .LBB16_110
-; RV32I-NEXT: .LBB16_108:
+; RV32I-NEXT: bnez a4, .LBB16_117
+; RV32I-NEXT: j .LBB16_118
+; RV32I-NEXT: .LBB16_116:
; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s5, a1, a5
@@ -7178,13 +7140,13 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t2, a1, s8
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: beqz a4, .LBB16_110
-; RV32I-NEXT: .LBB16_109:
+; RV32I-NEXT: beqz a4, .LBB16_118
+; RV32I-NEXT: .LBB16_117:
; RV32I-NEXT: mv s2, s5
; RV32I-NEXT: mv s1, t6
; RV32I-NEXT: mv s6, t1
; RV32I-NEXT: mv a0, t2
-; RV32I-NEXT: .LBB16_110:
+; RV32I-NEXT: .LBB16_118:
; RV32I-NEXT: srli a4, ra, 16
; RV32I-NEXT: lui t2, 16
; RV32I-NEXT: srli t1, ra, 24
@@ -7712,115 +7674,112 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: # %bb.19:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: sll a1, t3, s7
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: bnez s7, .LBB17_21
-; RV32I-NEXT: j .LBB17_22
+; RV32I-NEXT: j .LBB17_21
; RV32I-NEXT: .LBB17_20:
; RV32I-NEXT: sll s2, t3, a4
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: or a1, a1, s5
-; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: beqz s7, .LBB17_22
; RV32I-NEXT: .LBB17_21:
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: beqz s7, .LBB17_23
+; RV32I-NEXT: # %bb.22:
; RV32I-NEXT: mv s4, a1
-; RV32I-NEXT: .LBB17_22:
+; RV32I-NEXT: .LBB17_23:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: bltu a4, s9, .LBB17_24
-; RV32I-NEXT: # %bb.23:
+; RV32I-NEXT: bltu a4, s9, .LBB17_25
+; RV32I-NEXT: # %bb.24:
; RV32I-NEXT: sw zero, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: j .LBB17_25
-; RV32I-NEXT: .LBB17_24:
+; RV32I-NEXT: j .LBB17_26
+; RV32I-NEXT: .LBB17_25:
; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, a6, s1
; RV32I-NEXT: or s4, a7, s3
-; RV32I-NEXT: .LBB17_25:
+; RV32I-NEXT: .LBB17_26:
; RV32I-NEXT: sub ra, a1, a4
; RV32I-NEXT: mv a7, t1
; RV32I-NEXT: mv a6, t2
-; RV32I-NEXT: beqz a4, .LBB17_27
-; RV32I-NEXT: # %bb.26:
+; RV32I-NEXT: beqz a4, .LBB17_28
+; RV32I-NEXT: # %bb.27:
; RV32I-NEXT: mv a7, s2
; RV32I-NEXT: mv a6, s4
-; RV32I-NEXT: .LBB17_27:
+; RV32I-NEXT: .LBB17_28:
; RV32I-NEXT: neg s1, ra
; RV32I-NEXT: sll s2, t2, s1
-; RV32I-NEXT: bltu ra, t4, .LBB17_29
-; RV32I-NEXT: # %bb.28:
+; RV32I-NEXT: bltu ra, t4, .LBB17_30
+; RV32I-NEXT: # %bb.29:
; RV32I-NEXT: srl a1, t2, ra
-; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bnez ra, .LBB17_30
; RV32I-NEXT: j .LBB17_31
-; RV32I-NEXT: .LBB17_29:
-; RV32I-NEXT: or a1, s0, s2
-; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: beqz ra, .LBB17_31
; RV32I-NEXT: .LBB17_30:
-; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: or a1, s0, s2
; RV32I-NEXT: .LBB17_31:
-; RV32I-NEXT: bltu ra, t4, .LBB17_33
+; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: beqz ra, .LBB17_33
; RV32I-NEXT: # %bb.32:
+; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .LBB17_33:
+; RV32I-NEXT: bltu ra, t4, .LBB17_35
+; RV32I-NEXT: # %bb.34:
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: srl a1, a5, ra
-; RV32I-NEXT: mv t5, t3
-; RV32I-NEXT: bnez ra, .LBB17_34
-; RV32I-NEXT: j .LBB17_35
-; RV32I-NEXT: .LBB17_33:
+; RV32I-NEXT: j .LBB17_36
+; RV32I-NEXT: .LBB17_35:
; RV32I-NEXT: srl a1, t2, s10
; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, a5, s1
; RV32I-NEXT: or a1, t5, a1
+; RV32I-NEXT: .LBB17_36:
; RV32I-NEXT: mv t5, t3
-; RV32I-NEXT: beqz ra, .LBB17_35
-; RV32I-NEXT: .LBB17_34:
+; RV32I-NEXT: beqz ra, .LBB17_38
+; RV32I-NEXT: # %bb.37:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB17_35:
+; RV32I-NEXT: .LBB17_38:
; RV32I-NEXT: sub s3, s9, ra
-; RV32I-NEXT: bltu ra, t4, .LBB17_38
-; RV32I-NEXT: # %bb.36:
+; RV32I-NEXT: bltu ra, t4, .LBB17_41
+; RV32I-NEXT: # %bb.39:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s3, t4, .LBB17_39
-; RV32I-NEXT: .LBB17_37:
+; RV32I-NEXT: bgeu s3, t4, .LBB17_42
+; RV32I-NEXT: .LBB17_40:
; RV32I-NEXT: sll s1, t1, s1
; RV32I-NEXT: neg a1, s3
; RV32I-NEXT: srl a1, t1, a1
; RV32I-NEXT: or a1, a1, s2
-; RV32I-NEXT: j .LBB17_40
-; RV32I-NEXT: .LBB17_38:
+; RV32I-NEXT: j .LBB17_43
+; RV32I-NEXT: .LBB17_41:
; RV32I-NEXT: srl a1, a5, s10
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s3, t4, .LBB17_37
-; RV32I-NEXT: .LBB17_39:
+; RV32I-NEXT: bltu s3, t4, .LBB17_40
+; RV32I-NEXT: .LBB17_42:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: sll a1, t1, s3
-; RV32I-NEXT: .LBB17_40:
+; RV32I-NEXT: .LBB17_43:
; RV32I-NEXT: addi s4, ra, -64
; RV32I-NEXT: mv s2, t2
-; RV32I-NEXT: beqz s3, .LBB17_42
-; RV32I-NEXT: # %bb.41:
+; RV32I-NEXT: beqz s3, .LBB17_45
+; RV32I-NEXT: # %bb.44:
; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: .LBB17_42:
+; RV32I-NEXT: .LBB17_45:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s5, a7
-; RV32I-NEXT: bltu s4, t4, .LBB17_44
-; RV32I-NEXT: # %bb.43:
+; RV32I-NEXT: bltu s4, t4, .LBB17_47
+; RV32I-NEXT: # %bb.46:
; RV32I-NEXT: srl t0, t2, s4
-; RV32I-NEXT: j .LBB17_45
-; RV32I-NEXT: .LBB17_44:
+; RV32I-NEXT: j .LBB17_48
+; RV32I-NEXT: .LBB17_47:
; RV32I-NEXT: srl a1, t1, ra
; RV32I-NEXT: neg t0, s4
; RV32I-NEXT: sll t0, t2, t0
; RV32I-NEXT: or t0, a1, t0
-; RV32I-NEXT: .LBB17_45:
+; RV32I-NEXT: .LBB17_48:
; RV32I-NEXT: mv s0, s10
; RV32I-NEXT: mv a7, a6
; RV32I-NEXT: lbu s8, 19(a0)
; RV32I-NEXT: lbu a1, 23(a0)
; RV32I-NEXT: mv s3, t1
-; RV32I-NEXT: beqz s4, .LBB17_47
-; RV32I-NEXT: # %bb.46:
+; RV32I-NEXT: beqz s4, .LBB17_50
+; RV32I-NEXT: # %bb.49:
; RV32I-NEXT: mv s3, t0
-; RV32I-NEXT: .LBB17_47:
+; RV32I-NEXT: .LBB17_50:
; RV32I-NEXT: mv a6, a3
; RV32I-NEXT: lbu s10, 17(a0)
; RV32I-NEXT: lbu t0, 18(a0)
@@ -7829,25 +7788,25 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: slli a1, a1, 8
; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: bltu s4, t4, .LBB17_49
-; RV32I-NEXT: # %bb.48:
+; RV32I-NEXT: bltu s4, t4, .LBB17_52
+; RV32I-NEXT: # %bb.51:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB17_50
-; RV32I-NEXT: .LBB17_49:
+; RV32I-NEXT: j .LBB17_53
+; RV32I-NEXT: .LBB17_52:
; RV32I-NEXT: srl s4, t2, ra
-; RV32I-NEXT: .LBB17_50:
+; RV32I-NEXT: .LBB17_53:
; RV32I-NEXT: or s11, s8, t0
; RV32I-NEXT: lbu t0, 16(a0)
; RV32I-NEXT: lbu s8, 20(a0)
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: or t6, a1, t6
-; RV32I-NEXT: bgeu ra, a3, .LBB17_52
-; RV32I-NEXT: # %bb.51:
+; RV32I-NEXT: bgeu ra, a3, .LBB17_55
+; RV32I-NEXT: # %bb.54:
; RV32I-NEXT: or s3, t5, s1
; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s4, a1, s2
-; RV32I-NEXT: .LBB17_52:
+; RV32I-NEXT: .LBB17_55:
; RV32I-NEXT: or a1, s10, t0
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: or t0, s9, s8
@@ -7855,58 +7814,58 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: mv t5, t3
; RV32I-NEXT: mv s1, a5
; RV32I-NEXT: mv a3, a6
-; RV32I-NEXT: beqz ra, .LBB17_54
-; RV32I-NEXT: # %bb.53:
+; RV32I-NEXT: beqz ra, .LBB17_57
+; RV32I-NEXT: # %bb.56:
; RV32I-NEXT: mv t5, s3
; RV32I-NEXT: mv s1, s4
-; RV32I-NEXT: .LBB17_54:
+; RV32I-NEXT: .LBB17_57:
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, s11, a1
; RV32I-NEXT: or s1, t6, t0
; RV32I-NEXT: li a1, 64
; RV32I-NEXT: mv a6, a7
; RV32I-NEXT: mv a7, s0
-; RV32I-NEXT: bltu ra, a1, .LBB17_56
-; RV32I-NEXT: # %bb.55:
+; RV32I-NEXT: bltu ra, a1, .LBB17_59
+; RV32I-NEXT: # %bb.58:
; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .LBB17_56:
+; RV32I-NEXT: .LBB17_59:
; RV32I-NEXT: srl s3, s2, a7
; RV32I-NEXT: sll ra, s1, a4
; RV32I-NEXT: mv a7, s5
; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu a4, t4, .LBB17_58
-; RV32I-NEXT: # %bb.57:
+; RV32I-NEXT: bltu a4, t4, .LBB17_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, s2, a4
-; RV32I-NEXT: j .LBB17_59
-; RV32I-NEXT: .LBB17_58:
+; RV32I-NEXT: j .LBB17_62
+; RV32I-NEXT: .LBB17_61:
; RV32I-NEXT: sll a1, s2, a4
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a1, s3, ra
-; RV32I-NEXT: .LBB17_59:
+; RV32I-NEXT: .LBB17_62:
; RV32I-NEXT: lbu s9, 27(a0)
; RV32I-NEXT: lbu t6, 31(a0)
; RV32I-NEXT: mv t5, s1
-; RV32I-NEXT: beqz a4, .LBB17_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: beqz a4, .LBB17_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB17_61:
+; RV32I-NEXT: .LBB17_64:
; RV32I-NEXT: lbu s8, 25(a0)
; RV32I-NEXT: lbu s4, 26(a0)
; RV32I-NEXT: lbu s11, 29(a0)
; RV32I-NEXT: lbu s10, 30(a0)
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: slli t6, t6, 8
-; RV32I-NEXT: bltu s6, t4, .LBB17_63
-; RV32I-NEXT: # %bb.62:
+; RV32I-NEXT: bltu s6, t4, .LBB17_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: srl t0, s1, s6
-; RV32I-NEXT: j .LBB17_64
-; RV32I-NEXT: .LBB17_63:
+; RV32I-NEXT: j .LBB17_67
+; RV32I-NEXT: .LBB17_66:
; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: sll a1, s1, a1
; RV32I-NEXT: or t0, s3, a1
-; RV32I-NEXT: .LBB17_64:
+; RV32I-NEXT: .LBB17_67:
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: lbu s3, 24(a0)
; RV32I-NEXT: lbu a1, 28(a0)
@@ -7914,174 +7873,170 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: or t6, t6, s10
; RV32I-NEXT: mv s9, s2
-; RV32I-NEXT: beqz s6, .LBB17_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: beqz s6, .LBB17_69
+; RV32I-NEXT: # %bb.68:
; RV32I-NEXT: mv s9, t0
-; RV32I-NEXT: .LBB17_66:
+; RV32I-NEXT: .LBB17_69:
; RV32I-NEXT: or a0, s8, s3
; RV32I-NEXT: slli t0, s4, 16
; RV32I-NEXT: or a1, s11, a1
; RV32I-NEXT: slli t6, t6, 16
-; RV32I-NEXT: bltu s6, t4, .LBB17_68
-; RV32I-NEXT: # %bb.67:
+; RV32I-NEXT: bltu s6, t4, .LBB17_71
+; RV32I-NEXT: # %bb.70:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB17_69
-; RV32I-NEXT: .LBB17_68:
+; RV32I-NEXT: j .LBB17_72
+; RV32I-NEXT: .LBB17_71:
; RV32I-NEXT: srl s4, s1, s0
-; RV32I-NEXT: .LBB17_69:
+; RV32I-NEXT: .LBB17_72:
; RV32I-NEXT: li s11, 64
; RV32I-NEXT: or s6, t0, a0
; RV32I-NEXT: or a0, t6, a1
-; RV32I-NEXT: bltu a4, t4, .LBB17_71
-; RV32I-NEXT: # %bb.70:
+; RV32I-NEXT: bltu a4, t4, .LBB17_74
+; RV32I-NEXT: # %bb.73:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a1, s6, a4
-; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: bnez a4, .LBB17_72
-; RV32I-NEXT: j .LBB17_73
-; RV32I-NEXT: .LBB17_71:
+; RV32I-NEXT: j .LBB17_75
+; RV32I-NEXT: .LBB17_74:
; RV32I-NEXT: sll s3, s6, a4
; RV32I-NEXT: srl a1, s6, s0
; RV32I-NEXT: sll t0, a0, a4
; RV32I-NEXT: or a1, a1, t0
+; RV32I-NEXT: .LBB17_75:
; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: beqz a4, .LBB17_73
-; RV32I-NEXT: .LBB17_72:
+; RV32I-NEXT: beqz a4, .LBB17_77
+; RV32I-NEXT: # %bb.76:
; RV32I-NEXT: mv s10, a1
-; RV32I-NEXT: .LBB17_73:
-; RV32I-NEXT: bltu s7, t4, .LBB17_75
-; RV32I-NEXT: # %bb.74:
+; RV32I-NEXT: .LBB17_77:
+; RV32I-NEXT: bltu s7, t4, .LBB17_79
+; RV32I-NEXT: # %bb.78:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, s2, s7
-; RV32I-NEXT: mv s0, s1
-; RV32I-NEXT: bnez s7, .LBB17_76
-; RV32I-NEXT: j .LBB17_77
-; RV32I-NEXT: .LBB17_75:
+; RV32I-NEXT: j .LBB17_80
+; RV32I-NEXT: .LBB17_79:
; RV32I-NEXT: sll s5, s2, a4
; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: srl a1, s2, a1
; RV32I-NEXT: or a1, a1, ra
+; RV32I-NEXT: .LBB17_80:
; RV32I-NEXT: mv s0, s1
-; RV32I-NEXT: beqz s7, .LBB17_77
-; RV32I-NEXT: .LBB17_76:
+; RV32I-NEXT: beqz s7, .LBB17_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB17_77:
-; RV32I-NEXT: bltu a4, s11, .LBB17_79
-; RV32I-NEXT: # %bb.78:
+; RV32I-NEXT: .LBB17_82:
+; RV32I-NEXT: bltu a4, s11, .LBB17_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: li t5, 0
-; RV32I-NEXT: j .LBB17_80
-; RV32I-NEXT: .LBB17_79:
+; RV32I-NEXT: j .LBB17_85
+; RV32I-NEXT: .LBB17_84:
; RV32I-NEXT: or s5, s9, s3
; RV32I-NEXT: or s0, s4, s10
-; RV32I-NEXT: .LBB17_80:
+; RV32I-NEXT: .LBB17_85:
; RV32I-NEXT: addi s9, a4, -128
; RV32I-NEXT: mv s7, s6
; RV32I-NEXT: mv s8, a0
-; RV32I-NEXT: beqz a4, .LBB17_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: beqz a4, .LBB17_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: mv s7, s5
; RV32I-NEXT: mv s8, s0
-; RV32I-NEXT: .LBB17_82:
+; RV32I-NEXT: .LBB17_87:
; RV32I-NEXT: neg s3, s9
; RV32I-NEXT: srl s0, t3, s3
-; RV32I-NEXT: bltu s9, t4, .LBB17_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: bltu s9, t4, .LBB17_89
+; RV32I-NEXT: # %bb.88:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, t3, s9
-; RV32I-NEXT: j .LBB17_85
-; RV32I-NEXT: .LBB17_84:
+; RV32I-NEXT: j .LBB17_90
+; RV32I-NEXT: .LBB17_89:
; RV32I-NEXT: sll s5, t3, a4
; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, s0, a1
-; RV32I-NEXT: .LBB17_85:
+; RV32I-NEXT: .LBB17_90:
; RV32I-NEXT: sub s4, s11, s9
; RV32I-NEXT: mv t6, a5
-; RV32I-NEXT: beqz s9, .LBB17_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: beqz s9, .LBB17_92
+; RV32I-NEXT: # %bb.91:
; RV32I-NEXT: mv t6, a1
-; RV32I-NEXT: .LBB17_87:
-; RV32I-NEXT: bltu s4, t4, .LBB17_89
-; RV32I-NEXT: # %bb.88:
+; RV32I-NEXT: .LBB17_92:
+; RV32I-NEXT: bltu s4, t4, .LBB17_94
+; RV32I-NEXT: # %bb.93:
; RV32I-NEXT: srl a1, a5, s4
-; RV32I-NEXT: mv s0, t3
-; RV32I-NEXT: bnez s4, .LBB17_90
-; RV32I-NEXT: j .LBB17_91
-; RV32I-NEXT: .LBB17_89:
+; RV32I-NEXT: j .LBB17_95
+; RV32I-NEXT: .LBB17_94:
; RV32I-NEXT: neg a1, s4
; RV32I-NEXT: sll a1, a5, a1
; RV32I-NEXT: or a1, s0, a1
+; RV32I-NEXT: .LBB17_95:
; RV32I-NEXT: mv s0, t3
-; RV32I-NEXT: beqz s4, .LBB17_91
-; RV32I-NEXT: .LBB17_90:
+; RV32I-NEXT: beqz s4, .LBB17_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB17_91:
-; RV32I-NEXT: bltu s4, t4, .LBB17_94
-; RV32I-NEXT: # %bb.92:
+; RV32I-NEXT: .LBB17_97:
+; RV32I-NEXT: bltu s4, t4, .LBB17_101
+; RV32I-NEXT: # %bb.98:
; RV32I-NEXT: li s4, 0
+; RV32I-NEXT: .LBB17_99:
; RV32I-NEXT: li ra, 64
-; RV32I-NEXT: bgeu s9, t4, .LBB17_95
-; RV32I-NEXT: .LBB17_93:
+; RV32I-NEXT: bgeu s9, t4, .LBB17_102
+; RV32I-NEXT: # %bb.100:
; RV32I-NEXT: sll s10, t1, a4
; RV32I-NEXT: srl a1, t1, s3
; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: j .LBB17_96
-; RV32I-NEXT: .LBB17_94:
+; RV32I-NEXT: j .LBB17_103
+; RV32I-NEXT: .LBB17_101:
; RV32I-NEXT: srl s4, a5, s3
-; RV32I-NEXT: li ra, 64
-; RV32I-NEXT: bltu s9, t4, .LBB17_93
-; RV32I-NEXT: .LBB17_95:
+; RV32I-NEXT: j .LBB17_99
+; RV32I-NEXT: .LBB17_102:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a1, t1, s9
-; RV32I-NEXT: .LBB17_96:
+; RV32I-NEXT: .LBB17_103:
; RV32I-NEXT: addi s11, s9, -64
; RV32I-NEXT: mv s3, t2
-; RV32I-NEXT: beqz s9, .LBB17_98
-; RV32I-NEXT: # %bb.97:
+; RV32I-NEXT: beqz s9, .LBB17_105
+; RV32I-NEXT: # %bb.104:
; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: .LBB17_98:
-; RV32I-NEXT: bltu s11, t4, .LBB17_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: .LBB17_105:
+; RV32I-NEXT: bltu s11, t4, .LBB17_107
+; RV32I-NEXT: # %bb.106:
; RV32I-NEXT: li t4, 0
; RV32I-NEXT: sll a1, t3, s11
-; RV32I-NEXT: bnez s11, .LBB17_101
-; RV32I-NEXT: j .LBB17_102
-; RV32I-NEXT: .LBB17_100:
+; RV32I-NEXT: bnez s11, .LBB17_108
+; RV32I-NEXT: j .LBB17_109
+; RV32I-NEXT: .LBB17_107:
; RV32I-NEXT: sll t4, t3, s9
; RV32I-NEXT: neg a1, s11
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: sll t0, a5, s9
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: beqz s11, .LBB17_102
-; RV32I-NEXT: .LBB17_101:
+; RV32I-NEXT: beqz s11, .LBB17_109
+; RV32I-NEXT: .LBB17_108:
; RV32I-NEXT: mv a5, a1
-; RV32I-NEXT: .LBB17_102:
-; RV32I-NEXT: bltu s9, ra, .LBB17_104
-; RV32I-NEXT: # %bb.103:
+; RV32I-NEXT: .LBB17_109:
+; RV32I-NEXT: bltu s9, ra, .LBB17_111
+; RV32I-NEXT: # %bb.110:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li t6, 0
-; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: bnez s9, .LBB17_105
-; RV32I-NEXT: j .LBB17_106
-; RV32I-NEXT: .LBB17_104:
+; RV32I-NEXT: j .LBB17_112
+; RV32I-NEXT: .LBB17_111:
; RV32I-NEXT: or t4, s0, s10
; RV32I-NEXT: or a5, s4, s3
+; RV32I-NEXT: .LBB17_112:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: beqz s9, .LBB17_106
-; RV32I-NEXT: .LBB17_105:
+; RV32I-NEXT: beqz s9, .LBB17_114
+; RV32I-NEXT: # %bb.113:
; RV32I-NEXT: mv t1, t4
; RV32I-NEXT: mv t2, a5
-; RV32I-NEXT: .LBB17_106:
-; RV32I-NEXT: bltu a4, a1, .LBB17_108
-; RV32I-NEXT: # %bb.107:
+; RV32I-NEXT: .LBB17_114:
+; RV32I-NEXT: bltu a4, a1, .LBB17_116
+; RV32I-NEXT: # %bb.115:
; RV32I-NEXT: li ra, 0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a6, 0
-; RV32I-NEXT: bnez a4, .LBB17_109
-; RV32I-NEXT: j .LBB17_110
-; RV32I-NEXT: .LBB17_108:
+; RV32I-NEXT: bnez a4, .LBB17_117
+; RV32I-NEXT: j .LBB17_118
+; RV32I-NEXT: .LBB17_116:
; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s5, a1, a5
@@ -8092,13 +8047,13 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t2, a1, s8
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: beqz a4, .LBB17_110
-; RV32I-NEXT: .LBB17_109:
+; RV32I-NEXT: beqz a4, .LBB17_118
+; RV32I-NEXT: .LBB17_117:
; RV32I-NEXT: mv s2, s5
; RV32I-NEXT: mv s1, t6
; RV32I-NEXT: mv s6, t1
; RV32I-NEXT: mv a0, t2
-; RV32I-NEXT: .LBB17_110:
+; RV32I-NEXT: .LBB17_118:
; RV32I-NEXT: srli a4, ra, 16
; RV32I-NEXT: lui t2, 16
; RV32I-NEXT: srli t1, ra, 24
@@ -8726,276 +8681,268 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: srl a3, a0, a5
-; RV32I-NEXT: mv a6, s0
-; RV32I-NEXT: bnez a5, .LBB18_40
-; RV32I-NEXT: j .LBB18_41
+; RV32I-NEXT: j .LBB18_40
; RV32I-NEXT: .LBB18_39:
; RV32I-NEXT: srl s4, t3, a5
; RV32I-NEXT: srl a3, s0, a5
; RV32I-NEXT: sll a6, a0, s6
; RV32I-NEXT: or a3, a3, a6
-; RV32I-NEXT: mv a6, s0
-; RV32I-NEXT: beqz a5, .LBB18_41
; RV32I-NEXT: .LBB18_40:
+; RV32I-NEXT: mv a6, s0
+; RV32I-NEXT: beqz a5, .LBB18_42
+; RV32I-NEXT: # %bb.41:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB18_41:
-; RV32I-NEXT: bltu a5, t5, .LBB18_44
-; RV32I-NEXT: # %bb.42:
+; RV32I-NEXT: .LBB18_42:
+; RV32I-NEXT: bltu a5, t5, .LBB18_45
+; RV32I-NEXT: # %bb.43:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t5, .LBB18_45
-; RV32I-NEXT: .LBB18_43:
+; RV32I-NEXT: bgeu s7, t5, .LBB18_46
+; RV32I-NEXT: .LBB18_44:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
-; RV32I-NEXT: mv s10, t3
-; RV32I-NEXT: bnez s7, .LBB18_46
; RV32I-NEXT: j .LBB18_47
-; RV32I-NEXT: .LBB18_44:
-; RV32I-NEXT: srl s1, a0, a5
-; RV32I-NEXT: bltu s7, t5, .LBB18_43
; RV32I-NEXT: .LBB18_45:
+; RV32I-NEXT: srl s1, a0, a5
+; RV32I-NEXT: bltu s7, t5, .LBB18_44
+; RV32I-NEXT: .LBB18_46:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: mv s10, t3
-; RV32I-NEXT: beqz s7, .LBB18_47
-; RV32I-NEXT: .LBB18_46:
-; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB18_47:
-; RV32I-NEXT: bltu s9, t5, .LBB18_49
+; RV32I-NEXT: mv s10, t3
+; RV32I-NEXT: beqz s7, .LBB18_49
; RV32I-NEXT: # %bb.48:
-; RV32I-NEXT: srl a3, t3, s9
-; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: bnez s9, .LBB18_50
-; RV32I-NEXT: j .LBB18_51
+; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB18_49:
+; RV32I-NEXT: bltu s9, t5, .LBB18_51
+; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: srl a3, t3, s9
+; RV32I-NEXT: j .LBB18_52
+; RV32I-NEXT: .LBB18_51:
; RV32I-NEXT: sll a3, t3, s11
; RV32I-NEXT: or a3, s2, a3
+; RV32I-NEXT: .LBB18_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB18_51
-; RV32I-NEXT: .LBB18_50:
+; RV32I-NEXT: beqz s9, .LBB18_54
+; RV32I-NEXT: # %bb.53:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB18_51:
-; RV32I-NEXT: bltu s9, t5, .LBB18_53
-; RV32I-NEXT: # %bb.52:
+; RV32I-NEXT: .LBB18_54:
+; RV32I-NEXT: bltu s9, t5, .LBB18_56
+; RV32I-NEXT: # %bb.55:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a5, t6, .LBB18_54
-; RV32I-NEXT: j .LBB18_55
-; RV32I-NEXT: .LBB18_53:
+; RV32I-NEXT: bltu a5, t6, .LBB18_57
+; RV32I-NEXT: j .LBB18_58
+; RV32I-NEXT: .LBB18_56:
; RV32I-NEXT: srl s7, t3, a5
-; RV32I-NEXT: bgeu a5, t6, .LBB18_55
-; RV32I-NEXT: .LBB18_54:
+; RV32I-NEXT: bgeu a5, t6, .LBB18_58
+; RV32I-NEXT: .LBB18_57:
; RV32I-NEXT: or s2, a6, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB18_55:
+; RV32I-NEXT: .LBB18_58:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a6, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a5, .LBB18_57
-; RV32I-NEXT: # %bb.56:
+; RV32I-NEXT: beqz a5, .LBB18_60
+; RV32I-NEXT: # %bb.59:
; RV32I-NEXT: mv a6, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB18_57:
+; RV32I-NEXT: .LBB18_60:
; RV32I-NEXT: sw a6, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a5
-; RV32I-NEXT: bltu a5, t6, .LBB18_59
-; RV32I-NEXT: # %bb.58:
+; RV32I-NEXT: bltu a5, t6, .LBB18_62
+; RV32I-NEXT: # %bb.61:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: .LBB18_59:
+; RV32I-NEXT: .LBB18_62:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a6, t1, s3
; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t5, .LBB18_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: bltu s2, t5, .LBB18_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: j .LBB18_62
-; RV32I-NEXT: .LBB18_61:
+; RV32I-NEXT: j .LBB18_65
+; RV32I-NEXT: .LBB18_64:
; RV32I-NEXT: sll s11, t1, s6
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB18_62:
+; RV32I-NEXT: .LBB18_65:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a7
-; RV32I-NEXT: beqz s2, .LBB18_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: beqz s2, .LBB18_67
+; RV32I-NEXT: # %bb.66:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB18_64:
-; RV32I-NEXT: bltu s1, t5, .LBB18_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: .LBB18_67:
+; RV32I-NEXT: bltu s1, t5, .LBB18_69
+; RV32I-NEXT: # %bb.68:
; RV32I-NEXT: srl a3, a7, s1
-; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: bnez s1, .LBB18_67
-; RV32I-NEXT: j .LBB18_68
-; RV32I-NEXT: .LBB18_66:
+; RV32I-NEXT: j .LBB18_70
+; RV32I-NEXT: .LBB18_69:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a7, a3
; RV32I-NEXT: or a3, a6, a3
+; RV32I-NEXT: .LBB18_70:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz s1, .LBB18_68
-; RV32I-NEXT: .LBB18_67:
+; RV32I-NEXT: beqz s1, .LBB18_72
+; RV32I-NEXT: # %bb.71:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB18_68:
-; RV32I-NEXT: bltu s1, t5, .LBB18_71
-; RV32I-NEXT: # %bb.69:
+; RV32I-NEXT: .LBB18_72:
+; RV32I-NEXT: bltu s1, t5, .LBB18_76
+; RV32I-NEXT: # %bb.73:
; RV32I-NEXT: li s1, 0
+; RV32I-NEXT: .LBB18_74:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t5, .LBB18_72
-; RV32I-NEXT: .LBB18_70:
+; RV32I-NEXT: bgeu s2, t5, .LBB18_77
+; RV32I-NEXT: # %bb.75:
; RV32I-NEXT: sll s6, t2, s6
; RV32I-NEXT: srl a3, t2, s3
; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB18_73
-; RV32I-NEXT: .LBB18_71:
+; RV32I-NEXT: j .LBB18_78
+; RV32I-NEXT: .LBB18_76:
; RV32I-NEXT: srl s1, a7, s3
-; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t5, .LBB18_70
-; RV32I-NEXT: .LBB18_72:
+; RV32I-NEXT: j .LBB18_74
+; RV32I-NEXT: .LBB18_77:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t2, s2
-; RV32I-NEXT: .LBB18_73:
+; RV32I-NEXT: .LBB18_78:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: beqz s2, .LBB18_75
-; RV32I-NEXT: # %bb.74:
+; RV32I-NEXT: beqz s2, .LBB18_80
+; RV32I-NEXT: # %bb.79:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB18_75:
-; RV32I-NEXT: bltu s9, t5, .LBB18_77
-; RV32I-NEXT: # %bb.76:
+; RV32I-NEXT: .LBB18_80:
+; RV32I-NEXT: bltu s9, t5, .LBB18_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t1, s9
-; RV32I-NEXT: mv s7, a7
-; RV32I-NEXT: bnez s9, .LBB18_78
-; RV32I-NEXT: j .LBB18_79
-; RV32I-NEXT: .LBB18_77:
+; RV32I-NEXT: j .LBB18_83
+; RV32I-NEXT: .LBB18_82:
; RV32I-NEXT: sll s3, t1, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: sll s4, a7, s2
; RV32I-NEXT: or a3, a3, s4
+; RV32I-NEXT: .LBB18_83:
; RV32I-NEXT: mv s7, a7
-; RV32I-NEXT: beqz s9, .LBB18_79
-; RV32I-NEXT: .LBB18_78:
+; RV32I-NEXT: beqz s9, .LBB18_85
+; RV32I-NEXT: # %bb.84:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB18_79:
-; RV32I-NEXT: bltu s2, t6, .LBB18_81
-; RV32I-NEXT: # %bb.80:
+; RV32I-NEXT: .LBB18_85:
+; RV32I-NEXT: bltu s2, t6, .LBB18_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB18_82
-; RV32I-NEXT: .LBB18_81:
+; RV32I-NEXT: j .LBB18_88
+; RV32I-NEXT: .LBB18_87:
; RV32I-NEXT: or s3, a6, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB18_82:
+; RV32I-NEXT: .LBB18_88:
; RV32I-NEXT: addi ra, a5, -128
; RV32I-NEXT: mv s4, t2
; RV32I-NEXT: mv s6, a4
-; RV32I-NEXT: beqz s2, .LBB18_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: beqz s2, .LBB18_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: mv s4, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB18_84:
+; RV32I-NEXT: .LBB18_90:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, a4, s9
-; RV32I-NEXT: bltu ra, t5, .LBB18_86
-; RV32I-NEXT: # %bb.85:
+; RV32I-NEXT: bltu ra, t5, .LBB18_92
+; RV32I-NEXT: # %bb.91:
; RV32I-NEXT: sra a3, a4, ra
-; RV32I-NEXT: mv s1, t2
-; RV32I-NEXT: bnez ra, .LBB18_87
-; RV32I-NEXT: j .LBB18_88
-; RV32I-NEXT: .LBB18_86:
+; RV32I-NEXT: j .LBB18_93
+; RV32I-NEXT: .LBB18_92:
; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
+; RV32I-NEXT: .LBB18_93:
; RV32I-NEXT: mv s1, t2
-; RV32I-NEXT: beqz ra, .LBB18_88
-; RV32I-NEXT: .LBB18_87:
+; RV32I-NEXT: beqz ra, .LBB18_95
+; RV32I-NEXT: # %bb.94:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB18_88:
-; RV32I-NEXT: bltu ra, t5, .LBB18_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: .LBB18_95:
+; RV32I-NEXT: bltu ra, t5, .LBB18_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: srai s2, a4, 31
; RV32I-NEXT: srl a3, a7, ra
-; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: bnez ra, .LBB18_91
-; RV32I-NEXT: j .LBB18_92
-; RV32I-NEXT: .LBB18_90:
+; RV32I-NEXT: j .LBB18_98
+; RV32I-NEXT: .LBB18_97:
; RV32I-NEXT: sra s2, a4, a5
; RV32I-NEXT: sll a3, a7, s9
; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
+; RV32I-NEXT: .LBB18_98:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz ra, .LBB18_92
-; RV32I-NEXT: .LBB18_91:
+; RV32I-NEXT: beqz ra, .LBB18_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB18_92:
+; RV32I-NEXT: .LBB18_100:
; RV32I-NEXT: mv s5, t0
; RV32I-NEXT: sub s10, t6, ra
; RV32I-NEXT: li t0, 64
-; RV32I-NEXT: bltu ra, t5, .LBB18_94
-; RV32I-NEXT: # %bb.93:
+; RV32I-NEXT: bltu ra, t5, .LBB18_102
+; RV32I-NEXT: # %bb.101:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: j .LBB18_95
-; RV32I-NEXT: .LBB18_94:
+; RV32I-NEXT: j .LBB18_103
+; RV32I-NEXT: .LBB18_102:
; RV32I-NEXT: srl s7, a7, a5
-; RV32I-NEXT: .LBB18_95:
+; RV32I-NEXT: .LBB18_103:
; RV32I-NEXT: mv t6, s8
; RV32I-NEXT: mv s8, s11
-; RV32I-NEXT: bltu s10, t5, .LBB18_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: bltu s10, t5, .LBB18_105
+; RV32I-NEXT: # %bb.104:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t2, s10
-; RV32I-NEXT: j .LBB18_98
-; RV32I-NEXT: .LBB18_97:
+; RV32I-NEXT: j .LBB18_106
+; RV32I-NEXT: .LBB18_105:
; RV32I-NEXT: sll s9, t2, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t2, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB18_98:
+; RV32I-NEXT: .LBB18_106:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, a4
-; RV32I-NEXT: beqz s10, .LBB18_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: beqz s10, .LBB18_108
+; RV32I-NEXT: # %bb.107:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB18_100:
-; RV32I-NEXT: bltu s11, t5, .LBB18_102
-; RV32I-NEXT: # %bb.101:
+; RV32I-NEXT: .LBB18_108:
+; RV32I-NEXT: bltu s11, t5, .LBB18_110
+; RV32I-NEXT: # %bb.109:
; RV32I-NEXT: sra a3, a4, s11
-; RV32I-NEXT: bnez s11, .LBB18_103
-; RV32I-NEXT: j .LBB18_104
-; RV32I-NEXT: .LBB18_102:
+; RV32I-NEXT: bnez s11, .LBB18_111
+; RV32I-NEXT: j .LBB18_112
+; RV32I-NEXT: .LBB18_110:
; RV32I-NEXT: srl a3, t2, ra
; RV32I-NEXT: mv s10, s4
; RV32I-NEXT: neg s4, s11
; RV32I-NEXT: sll s4, a4, s4
; RV32I-NEXT: or a3, a3, s4
; RV32I-NEXT: mv s4, s10
-; RV32I-NEXT: beqz s11, .LBB18_104
-; RV32I-NEXT: .LBB18_103:
+; RV32I-NEXT: beqz s11, .LBB18_112
+; RV32I-NEXT: .LBB18_111:
; RV32I-NEXT: mv t2, a3
-; RV32I-NEXT: .LBB18_104:
-; RV32I-NEXT: bltu s11, t5, .LBB18_106
-; RV32I-NEXT: # %bb.105:
+; RV32I-NEXT: .LBB18_112:
+; RV32I-NEXT: bltu s11, t5, .LBB18_114
+; RV32I-NEXT: # %bb.113:
; RV32I-NEXT: srai t5, a4, 31
-; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bltu ra, t0, .LBB18_107
-; RV32I-NEXT: j .LBB18_108
-; RV32I-NEXT: .LBB18_106:
+; RV32I-NEXT: j .LBB18_115
+; RV32I-NEXT: .LBB18_114:
; RV32I-NEXT: sra t5, a4, ra
+; RV32I-NEXT: .LBB18_115:
; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bgeu ra, t0, .LBB18_108
-; RV32I-NEXT: .LBB18_107:
+; RV32I-NEXT: bgeu ra, t0, .LBB18_117
+; RV32I-NEXT: # %bb.116:
; RV32I-NEXT: or t2, a6, s9
; RV32I-NEXT: or t5, s7, s3
-; RV32I-NEXT: .LBB18_108:
+; RV32I-NEXT: .LBB18_117:
; RV32I-NEXT: li a6, 128
-; RV32I-NEXT: bnez ra, .LBB18_117
-; RV32I-NEXT: # %bb.109:
-; RV32I-NEXT: bgeu ra, t0, .LBB18_118
-; RV32I-NEXT: .LBB18_110:
-; RV32I-NEXT: bgeu a5, a6, .LBB18_112
-; RV32I-NEXT: .LBB18_111:
+; RV32I-NEXT: bnez ra, .LBB18_126
+; RV32I-NEXT: # %bb.118:
+; RV32I-NEXT: bgeu ra, t0, .LBB18_127
+; RV32I-NEXT: .LBB18_119:
+; RV32I-NEXT: bgeu a5, a6, .LBB18_121
+; RV32I-NEXT: .LBB18_120:
; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t1, a3, s8
; RV32I-NEXT: lw a3, 4(sp) # 4-byte Folded Reload
@@ -9004,23 +8951,23 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or s1, a3, s4
; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: .LBB18_112:
+; RV32I-NEXT: .LBB18_121:
; RV32I-NEXT: lw ra, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: mv t0, s5
-; RV32I-NEXT: beqz a5, .LBB18_114
-; RV32I-NEXT: # %bb.113:
+; RV32I-NEXT: beqz a5, .LBB18_123
+; RV32I-NEXT: # %bb.122:
; RV32I-NEXT: mv s0, t1
; RV32I-NEXT: mv a0, a7
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t3, s2
-; RV32I-NEXT: .LBB18_114:
-; RV32I-NEXT: bltu a5, a6, .LBB18_116
-; RV32I-NEXT: # %bb.115:
+; RV32I-NEXT: .LBB18_123:
+; RV32I-NEXT: bltu a5, a6, .LBB18_125
+; RV32I-NEXT: # %bb.124:
; RV32I-NEXT: srai a1, a4, 31
; RV32I-NEXT: mv t0, a1
; RV32I-NEXT: mv s11, a1
; RV32I-NEXT: mv ra, a1
-; RV32I-NEXT: .LBB18_116:
+; RV32I-NEXT: .LBB18_125:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli a7, s0, 24
@@ -9102,15 +9049,15 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB18_117:
+; RV32I-NEXT: .LBB18_126:
; RV32I-NEXT: mv t1, t2
; RV32I-NEXT: mv a7, t5
-; RV32I-NEXT: bltu ra, t0, .LBB18_110
-; RV32I-NEXT: .LBB18_118:
+; RV32I-NEXT: bltu ra, t0, .LBB18_119
+; RV32I-NEXT: .LBB18_127:
; RV32I-NEXT: srai s1, a4, 31
; RV32I-NEXT: mv s2, s1
-; RV32I-NEXT: bltu a5, a6, .LBB18_111
-; RV32I-NEXT: j .LBB18_112
+; RV32I-NEXT: bltu a5, a6, .LBB18_120
+; RV32I-NEXT: j .LBB18_121
%src = load i256, ptr %src.ptr, align 1
%byteOff = load i256, ptr %byteOff.ptr, align 1
%bitOff = shl i256 %byteOff, 3
@@ -9657,276 +9604,268 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: srl a3, a0, a5
-; RV32I-NEXT: mv a6, s0
-; RV32I-NEXT: bnez a5, .LBB19_40
-; RV32I-NEXT: j .LBB19_41
+; RV32I-NEXT: j .LBB19_40
; RV32I-NEXT: .LBB19_39:
; RV32I-NEXT: srl s4, t3, a5
; RV32I-NEXT: srl a3, s0, a5
; RV32I-NEXT: sll a6, a0, s6
; RV32I-NEXT: or a3, a3, a6
-; RV32I-NEXT: mv a6, s0
-; RV32I-NEXT: beqz a5, .LBB19_41
; RV32I-NEXT: .LBB19_40:
+; RV32I-NEXT: mv a6, s0
+; RV32I-NEXT: beqz a5, .LBB19_42
+; RV32I-NEXT: # %bb.41:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB19_41:
-; RV32I-NEXT: bltu a5, t5, .LBB19_44
-; RV32I-NEXT: # %bb.42:
+; RV32I-NEXT: .LBB19_42:
+; RV32I-NEXT: bltu a5, t5, .LBB19_45
+; RV32I-NEXT: # %bb.43:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t5, .LBB19_45
-; RV32I-NEXT: .LBB19_43:
+; RV32I-NEXT: bgeu s7, t5, .LBB19_46
+; RV32I-NEXT: .LBB19_44:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
-; RV32I-NEXT: mv s10, t3
-; RV32I-NEXT: bnez s7, .LBB19_46
; RV32I-NEXT: j .LBB19_47
-; RV32I-NEXT: .LBB19_44:
-; RV32I-NEXT: srl s1, a0, a5
-; RV32I-NEXT: bltu s7, t5, .LBB19_43
; RV32I-NEXT: .LBB19_45:
+; RV32I-NEXT: srl s1, a0, a5
+; RV32I-NEXT: bltu s7, t5, .LBB19_44
+; RV32I-NEXT: .LBB19_46:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: mv s10, t3
-; RV32I-NEXT: beqz s7, .LBB19_47
-; RV32I-NEXT: .LBB19_46:
-; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB19_47:
-; RV32I-NEXT: bltu s9, t5, .LBB19_49
+; RV32I-NEXT: mv s10, t3
+; RV32I-NEXT: beqz s7, .LBB19_49
; RV32I-NEXT: # %bb.48:
-; RV32I-NEXT: srl a3, t3, s9
-; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: bnez s9, .LBB19_50
-; RV32I-NEXT: j .LBB19_51
+; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB19_49:
+; RV32I-NEXT: bltu s9, t5, .LBB19_51
+; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: srl a3, t3, s9
+; RV32I-NEXT: j .LBB19_52
+; RV32I-NEXT: .LBB19_51:
; RV32I-NEXT: sll a3, t3, s11
; RV32I-NEXT: or a3, s2, a3
+; RV32I-NEXT: .LBB19_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB19_51
-; RV32I-NEXT: .LBB19_50:
+; RV32I-NEXT: beqz s9, .LBB19_54
+; RV32I-NEXT: # %bb.53:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB19_51:
-; RV32I-NEXT: bltu s9, t5, .LBB19_53
-; RV32I-NEXT: # %bb.52:
+; RV32I-NEXT: .LBB19_54:
+; RV32I-NEXT: bltu s9, t5, .LBB19_56
+; RV32I-NEXT: # %bb.55:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a5, t6, .LBB19_54
-; RV32I-NEXT: j .LBB19_55
-; RV32I-NEXT: .LBB19_53:
+; RV32I-NEXT: bltu a5, t6, .LBB19_57
+; RV32I-NEXT: j .LBB19_58
+; RV32I-NEXT: .LBB19_56:
; RV32I-NEXT: srl s7, t3, a5
-; RV32I-NEXT: bgeu a5, t6, .LBB19_55
-; RV32I-NEXT: .LBB19_54:
+; RV32I-NEXT: bgeu a5, t6, .LBB19_58
+; RV32I-NEXT: .LBB19_57:
; RV32I-NEXT: or s2, a6, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB19_55:
+; RV32I-NEXT: .LBB19_58:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a6, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a5, .LBB19_57
-; RV32I-NEXT: # %bb.56:
+; RV32I-NEXT: beqz a5, .LBB19_60
+; RV32I-NEXT: # %bb.59:
; RV32I-NEXT: mv a6, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB19_57:
+; RV32I-NEXT: .LBB19_60:
; RV32I-NEXT: sw a6, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a5
-; RV32I-NEXT: bltu a5, t6, .LBB19_59
-; RV32I-NEXT: # %bb.58:
+; RV32I-NEXT: bltu a5, t6, .LBB19_62
+; RV32I-NEXT: # %bb.61:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: .LBB19_59:
+; RV32I-NEXT: .LBB19_62:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a6, t1, s3
; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t5, .LBB19_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: bltu s2, t5, .LBB19_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: j .LBB19_62
-; RV32I-NEXT: .LBB19_61:
+; RV32I-NEXT: j .LBB19_65
+; RV32I-NEXT: .LBB19_64:
; RV32I-NEXT: sll s11, t1, s6
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB19_62:
+; RV32I-NEXT: .LBB19_65:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a7
-; RV32I-NEXT: beqz s2, .LBB19_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: beqz s2, .LBB19_67
+; RV32I-NEXT: # %bb.66:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB19_64:
-; RV32I-NEXT: bltu s1, t5, .LBB19_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: .LBB19_67:
+; RV32I-NEXT: bltu s1, t5, .LBB19_69
+; RV32I-NEXT: # %bb.68:
; RV32I-NEXT: srl a3, a7, s1
-; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: bnez s1, .LBB19_67
-; RV32I-NEXT: j .LBB19_68
-; RV32I-NEXT: .LBB19_66:
+; RV32I-NEXT: j .LBB19_70
+; RV32I-NEXT: .LBB19_69:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a7, a3
; RV32I-NEXT: or a3, a6, a3
+; RV32I-NEXT: .LBB19_70:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz s1, .LBB19_68
-; RV32I-NEXT: .LBB19_67:
+; RV32I-NEXT: beqz s1, .LBB19_72
+; RV32I-NEXT: # %bb.71:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB19_68:
-; RV32I-NEXT: bltu s1, t5, .LBB19_71
-; RV32I-NEXT: # %bb.69:
+; RV32I-NEXT: .LBB19_72:
+; RV32I-NEXT: bltu s1, t5, .LBB19_76
+; RV32I-NEXT: # %bb.73:
; RV32I-NEXT: li s1, 0
+; RV32I-NEXT: .LBB19_74:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t5, .LBB19_72
-; RV32I-NEXT: .LBB19_70:
+; RV32I-NEXT: bgeu s2, t5, .LBB19_77
+; RV32I-NEXT: # %bb.75:
; RV32I-NEXT: sll s6, t2, s6
; RV32I-NEXT: srl a3, t2, s3
; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB19_73
-; RV32I-NEXT: .LBB19_71:
+; RV32I-NEXT: j .LBB19_78
+; RV32I-NEXT: .LBB19_76:
; RV32I-NEXT: srl s1, a7, s3
-; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t5, .LBB19_70
-; RV32I-NEXT: .LBB19_72:
+; RV32I-NEXT: j .LBB19_74
+; RV32I-NEXT: .LBB19_77:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t2, s2
-; RV32I-NEXT: .LBB19_73:
+; RV32I-NEXT: .LBB19_78:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: beqz s2, .LBB19_75
-; RV32I-NEXT: # %bb.74:
+; RV32I-NEXT: beqz s2, .LBB19_80
+; RV32I-NEXT: # %bb.79:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB19_75:
-; RV32I-NEXT: bltu s9, t5, .LBB19_77
-; RV32I-NEXT: # %bb.76:
+; RV32I-NEXT: .LBB19_80:
+; RV32I-NEXT: bltu s9, t5, .LBB19_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t1, s9
-; RV32I-NEXT: mv s7, a7
-; RV32I-NEXT: bnez s9, .LBB19_78
-; RV32I-NEXT: j .LBB19_79
-; RV32I-NEXT: .LBB19_77:
+; RV32I-NEXT: j .LBB19_83
+; RV32I-NEXT: .LBB19_82:
; RV32I-NEXT: sll s3, t1, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: sll s4, a7, s2
; RV32I-NEXT: or a3, a3, s4
+; RV32I-NEXT: .LBB19_83:
; RV32I-NEXT: mv s7, a7
-; RV32I-NEXT: beqz s9, .LBB19_79
-; RV32I-NEXT: .LBB19_78:
+; RV32I-NEXT: beqz s9, .LBB19_85
+; RV32I-NEXT: # %bb.84:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB19_79:
-; RV32I-NEXT: bltu s2, t6, .LBB19_81
-; RV32I-NEXT: # %bb.80:
+; RV32I-NEXT: .LBB19_85:
+; RV32I-NEXT: bltu s2, t6, .LBB19_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB19_82
-; RV32I-NEXT: .LBB19_81:
+; RV32I-NEXT: j .LBB19_88
+; RV32I-NEXT: .LBB19_87:
; RV32I-NEXT: or s3, a6, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB19_82:
+; RV32I-NEXT: .LBB19_88:
; RV32I-NEXT: addi ra, a5, -128
; RV32I-NEXT: mv s4, t2
; RV32I-NEXT: mv s6, a4
-; RV32I-NEXT: beqz s2, .LBB19_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: beqz s2, .LBB19_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: mv s4, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB19_84:
+; RV32I-NEXT: .LBB19_90:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, a4, s9
-; RV32I-NEXT: bltu ra, t5, .LBB19_86
-; RV32I-NEXT: # %bb.85:
+; RV32I-NEXT: bltu ra, t5, .LBB19_92
+; RV32I-NEXT: # %bb.91:
; RV32I-NEXT: sra a3, a4, ra
-; RV32I-NEXT: mv s1, t2
-; RV32I-NEXT: bnez ra, .LBB19_87
-; RV32I-NEXT: j .LBB19_88
-; RV32I-NEXT: .LBB19_86:
+; RV32I-NEXT: j .LBB19_93
+; RV32I-NEXT: .LBB19_92:
; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
+; RV32I-NEXT: .LBB19_93:
; RV32I-NEXT: mv s1, t2
-; RV32I-NEXT: beqz ra, .LBB19_88
-; RV32I-NEXT: .LBB19_87:
+; RV32I-NEXT: beqz ra, .LBB19_95
+; RV32I-NEXT: # %bb.94:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB19_88:
-; RV32I-NEXT: bltu ra, t5, .LBB19_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: .LBB19_95:
+; RV32I-NEXT: bltu ra, t5, .LBB19_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: srai s2, a4, 31
; RV32I-NEXT: srl a3, a7, ra
-; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: bnez ra, .LBB19_91
-; RV32I-NEXT: j .LBB19_92
-; RV32I-NEXT: .LBB19_90:
+; RV32I-NEXT: j .LBB19_98
+; RV32I-NEXT: .LBB19_97:
; RV32I-NEXT: sra s2, a4, a5
; RV32I-NEXT: sll a3, a7, s9
; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
+; RV32I-NEXT: .LBB19_98:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz ra, .LBB19_92
-; RV32I-NEXT: .LBB19_91:
+; RV32I-NEXT: beqz ra, .LBB19_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB19_92:
+; RV32I-NEXT: .LBB19_100:
; RV32I-NEXT: mv s5, t0
; RV32I-NEXT: sub s10, t6, ra
; RV32I-NEXT: li t0, 64
-; RV32I-NEXT: bltu ra, t5, .LBB19_94
-; RV32I-NEXT: # %bb.93:
+; RV32I-NEXT: bltu ra, t5, .LBB19_102
+; RV32I-NEXT: # %bb.101:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: j .LBB19_95
-; RV32I-NEXT: .LBB19_94:
+; RV32I-NEXT: j .LBB19_103
+; RV32I-NEXT: .LBB19_102:
; RV32I-NEXT: srl s7, a7, a5
-; RV32I-NEXT: .LBB19_95:
+; RV32I-NEXT: .LBB19_103:
; RV32I-NEXT: mv t6, s8
; RV32I-NEXT: mv s8, s11
-; RV32I-NEXT: bltu s10, t5, .LBB19_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: bltu s10, t5, .LBB19_105
+; RV32I-NEXT: # %bb.104:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t2, s10
-; RV32I-NEXT: j .LBB19_98
-; RV32I-NEXT: .LBB19_97:
+; RV32I-NEXT: j .LBB19_106
+; RV32I-NEXT: .LBB19_105:
; RV32I-NEXT: sll s9, t2, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t2, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB19_98:
+; RV32I-NEXT: .LBB19_106:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, a4
-; RV32I-NEXT: beqz s10, .LBB19_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: beqz s10, .LBB19_108
+; RV32I-NEXT: # %bb.107:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB19_100:
-; RV32I-NEXT: bltu s11, t5, .LBB19_102
-; RV32I-NEXT: # %bb.101:
+; RV32I-NEXT: .LBB19_108:
+; RV32I-NEXT: bltu s11, t5, .LBB19_110
+; RV32I-NEXT: # %bb.109:
; RV32I-NEXT: sra a3, a4, s11
-; RV32I-NEXT: bnez s11, .LBB19_103
-; RV32I-NEXT: j .LBB19_104
-; RV32I-NEXT: .LBB19_102:
+; RV32I-NEXT: bnez s11, .LBB19_111
+; RV32I-NEXT: j .LBB19_112
+; RV32I-NEXT: .LBB19_110:
; RV32I-NEXT: srl a3, t2, ra
; RV32I-NEXT: mv s10, s4
; RV32I-NEXT: neg s4, s11
; RV32I-NEXT: sll s4, a4, s4
; RV32I-NEXT: or a3, a3, s4
; RV32I-NEXT: mv s4, s10
-; RV32I-NEXT: beqz s11, .LBB19_104
-; RV32I-NEXT: .LBB19_103:
+; RV32I-NEXT: beqz s11, .LBB19_112
+; RV32I-NEXT: .LBB19_111:
; RV32I-NEXT: mv t2, a3
-; RV32I-NEXT: .LBB19_104:
-; RV32I-NEXT: bltu s11, t5, .LBB19_106
-; RV32I-NEXT: # %bb.105:
+; RV32I-NEXT: .LBB19_112:
+; RV32I-NEXT: bltu s11, t5, .LBB19_114
+; RV32I-NEXT: # %bb.113:
; RV32I-NEXT: srai t5, a4, 31
-; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bltu ra, t0, .LBB19_107
-; RV32I-NEXT: j .LBB19_108
-; RV32I-NEXT: .LBB19_106:
+; RV32I-NEXT: j .LBB19_115
+; RV32I-NEXT: .LBB19_114:
; RV32I-NEXT: sra t5, a4, ra
+; RV32I-NEXT: .LBB19_115:
; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bgeu ra, t0, .LBB19_108
-; RV32I-NEXT: .LBB19_107:
+; RV32I-NEXT: bgeu ra, t0, .LBB19_117
+; RV32I-NEXT: # %bb.116:
; RV32I-NEXT: or t2, a6, s9
; RV32I-NEXT: or t5, s7, s3
-; RV32I-NEXT: .LBB19_108:
+; RV32I-NEXT: .LBB19_117:
; RV32I-NEXT: li a6, 128
-; RV32I-NEXT: bnez ra, .LBB19_117
-; RV32I-NEXT: # %bb.109:
-; RV32I-NEXT: bgeu ra, t0, .LBB19_118
-; RV32I-NEXT: .LBB19_110:
-; RV32I-NEXT: bgeu a5, a6, .LBB19_112
-; RV32I-NEXT: .LBB19_111:
+; RV32I-NEXT: bnez ra, .LBB19_126
+; RV32I-NEXT: # %bb.118:
+; RV32I-NEXT: bgeu ra, t0, .LBB19_127
+; RV32I-NEXT: .LBB19_119:
+; RV32I-NEXT: bgeu a5, a6, .LBB19_121
+; RV32I-NEXT: .LBB19_120:
; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t1, a3, s8
; RV32I-NEXT: lw a3, 4(sp) # 4-byte Folded Reload
@@ -9935,23 +9874,23 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: or s1, a3, s4
; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: .LBB19_112:
+; RV32I-NEXT: .LBB19_121:
; RV32I-NEXT: lw ra, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: mv t0, s5
-; RV32I-NEXT: beqz a5, .LBB19_114
-; RV32I-NEXT: # %bb.113:
+; RV32I-NEXT: beqz a5, .LBB19_123
+; RV32I-NEXT: # %bb.122:
; RV32I-NEXT: mv s0, t1
; RV32I-NEXT: mv a0, a7
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t3, s2
-; RV32I-NEXT: .LBB19_114:
-; RV32I-NEXT: bltu a5, a6, .LBB19_116
-; RV32I-NEXT: # %bb.115:
+; RV32I-NEXT: .LBB19_123:
+; RV32I-NEXT: bltu a5, a6, .LBB19_125
+; RV32I-NEXT: # %bb.124:
; RV32I-NEXT: srai a1, a4, 31
; RV32I-NEXT: mv t0, a1
; RV32I-NEXT: mv s11, a1
; RV32I-NEXT: mv ra, a1
-; RV32I-NEXT: .LBB19_116:
+; RV32I-NEXT: .LBB19_125:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli a7, s0, 24
@@ -10033,15 +9972,15 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB19_117:
+; RV32I-NEXT: .LBB19_126:
; RV32I-NEXT: mv t1, t2
; RV32I-NEXT: mv a7, t5
-; RV32I-NEXT: bltu ra, t0, .LBB19_110
-; RV32I-NEXT: .LBB19_118:
+; RV32I-NEXT: bltu ra, t0, .LBB19_119
+; RV32I-NEXT: .LBB19_127:
; RV32I-NEXT: srai s1, a4, 31
; RV32I-NEXT: mv s2, s1
-; RV32I-NEXT: bltu a5, a6, .LBB19_111
-; RV32I-NEXT: j .LBB19_112
+; RV32I-NEXT: bltu a5, a6, .LBB19_120
+; RV32I-NEXT: j .LBB19_121
%src = load i256, ptr %src.ptr, align 1
%wordOff = load i256, ptr %wordOff.ptr, align 1
%bitOff = shl i256 %wordOff, 5
@@ -10588,276 +10527,268 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: srl a3, a0, a5
-; RV32I-NEXT: mv a6, s0
-; RV32I-NEXT: bnez a5, .LBB20_40
-; RV32I-NEXT: j .LBB20_41
+; RV32I-NEXT: j .LBB20_40
; RV32I-NEXT: .LBB20_39:
; RV32I-NEXT: srl s4, t3, a5
; RV32I-NEXT: srl a3, s0, a5
; RV32I-NEXT: sll a6, a0, s6
; RV32I-NEXT: or a3, a3, a6
-; RV32I-NEXT: mv a6, s0
-; RV32I-NEXT: beqz a5, .LBB20_41
; RV32I-NEXT: .LBB20_40:
+; RV32I-NEXT: mv a6, s0
+; RV32I-NEXT: beqz a5, .LBB20_42
+; RV32I-NEXT: # %bb.41:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB20_41:
-; RV32I-NEXT: bltu a5, t5, .LBB20_44
-; RV32I-NEXT: # %bb.42:
+; RV32I-NEXT: .LBB20_42:
+; RV32I-NEXT: bltu a5, t5, .LBB20_45
+; RV32I-NEXT: # %bb.43:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t5, .LBB20_45
-; RV32I-NEXT: .LBB20_43:
+; RV32I-NEXT: bgeu s7, t5, .LBB20_46
+; RV32I-NEXT: .LBB20_44:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
-; RV32I-NEXT: mv s10, t3
-; RV32I-NEXT: bnez s7, .LBB20_46
; RV32I-NEXT: j .LBB20_47
-; RV32I-NEXT: .LBB20_44:
-; RV32I-NEXT: srl s1, a0, a5
-; RV32I-NEXT: bltu s7, t5, .LBB20_43
; RV32I-NEXT: .LBB20_45:
+; RV32I-NEXT: srl s1, a0, a5
+; RV32I-NEXT: bltu s7, t5, .LBB20_44
+; RV32I-NEXT: .LBB20_46:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: mv s10, t3
-; RV32I-NEXT: beqz s7, .LBB20_47
-; RV32I-NEXT: .LBB20_46:
-; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB20_47:
-; RV32I-NEXT: bltu s9, t5, .LBB20_49
+; RV32I-NEXT: mv s10, t3
+; RV32I-NEXT: beqz s7, .LBB20_49
; RV32I-NEXT: # %bb.48:
-; RV32I-NEXT: srl a3, t3, s9
-; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: bnez s9, .LBB20_50
-; RV32I-NEXT: j .LBB20_51
+; RV32I-NEXT: mv s10, a3
; RV32I-NEXT: .LBB20_49:
+; RV32I-NEXT: bltu s9, t5, .LBB20_51
+; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: srl a3, t3, s9
+; RV32I-NEXT: j .LBB20_52
+; RV32I-NEXT: .LBB20_51:
; RV32I-NEXT: sll a3, t3, s11
; RV32I-NEXT: or a3, s2, a3
+; RV32I-NEXT: .LBB20_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB20_51
-; RV32I-NEXT: .LBB20_50:
+; RV32I-NEXT: beqz s9, .LBB20_54
+; RV32I-NEXT: # %bb.53:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB20_51:
-; RV32I-NEXT: bltu s9, t5, .LBB20_53
-; RV32I-NEXT: # %bb.52:
+; RV32I-NEXT: .LBB20_54:
+; RV32I-NEXT: bltu s9, t5, .LBB20_56
+; RV32I-NEXT: # %bb.55:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a5, t6, .LBB20_54
-; RV32I-NEXT: j .LBB20_55
-; RV32I-NEXT: .LBB20_53:
+; RV32I-NEXT: bltu a5, t6, .LBB20_57
+; RV32I-NEXT: j .LBB20_58
+; RV32I-NEXT: .LBB20_56:
; RV32I-NEXT: srl s7, t3, a5
-; RV32I-NEXT: bgeu a5, t6, .LBB20_55
-; RV32I-NEXT: .LBB20_54:
+; RV32I-NEXT: bgeu a5, t6, .LBB20_58
+; RV32I-NEXT: .LBB20_57:
; RV32I-NEXT: or s2, a6, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB20_55:
+; RV32I-NEXT: .LBB20_58:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a6, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a5, .LBB20_57
-; RV32I-NEXT: # %bb.56:
+; RV32I-NEXT: beqz a5, .LBB20_60
+; RV32I-NEXT: # %bb.59:
; RV32I-NEXT: mv a6, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB20_57:
+; RV32I-NEXT: .LBB20_60:
; RV32I-NEXT: sw a6, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a5
-; RV32I-NEXT: bltu a5, t6, .LBB20_59
-; RV32I-NEXT: # %bb.58:
+; RV32I-NEXT: bltu a5, t6, .LBB20_62
+; RV32I-NEXT: # %bb.61:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: .LBB20_59:
+; RV32I-NEXT: .LBB20_62:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a6, t1, s3
; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t5, .LBB20_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: bltu s2, t5, .LBB20_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: j .LBB20_62
-; RV32I-NEXT: .LBB20_61:
+; RV32I-NEXT: j .LBB20_65
+; RV32I-NEXT: .LBB20_64:
; RV32I-NEXT: sll s11, t1, s6
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB20_62:
+; RV32I-NEXT: .LBB20_65:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a7
-; RV32I-NEXT: beqz s2, .LBB20_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: beqz s2, .LBB20_67
+; RV32I-NEXT: # %bb.66:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB20_64:
-; RV32I-NEXT: bltu s1, t5, .LBB20_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: .LBB20_67:
+; RV32I-NEXT: bltu s1, t5, .LBB20_69
+; RV32I-NEXT: # %bb.68:
; RV32I-NEXT: srl a3, a7, s1
-; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: bnez s1, .LBB20_67
-; RV32I-NEXT: j .LBB20_68
-; RV32I-NEXT: .LBB20_66:
+; RV32I-NEXT: j .LBB20_70
+; RV32I-NEXT: .LBB20_69:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a7, a3
; RV32I-NEXT: or a3, a6, a3
+; RV32I-NEXT: .LBB20_70:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz s1, .LBB20_68
-; RV32I-NEXT: .LBB20_67:
+; RV32I-NEXT: beqz s1, .LBB20_72
+; RV32I-NEXT: # %bb.71:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB20_68:
-; RV32I-NEXT: bltu s1, t5, .LBB20_71
-; RV32I-NEXT: # %bb.69:
+; RV32I-NEXT: .LBB20_72:
+; RV32I-NEXT: bltu s1, t5, .LBB20_76
+; RV32I-NEXT: # %bb.73:
; RV32I-NEXT: li s1, 0
+; RV32I-NEXT: .LBB20_74:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t5, .LBB20_72
-; RV32I-NEXT: .LBB20_70:
+; RV32I-NEXT: bgeu s2, t5, .LBB20_77
+; RV32I-NEXT: # %bb.75:
; RV32I-NEXT: sll s6, t2, s6
; RV32I-NEXT: srl a3, t2, s3
; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB20_73
-; RV32I-NEXT: .LBB20_71:
+; RV32I-NEXT: j .LBB20_78
+; RV32I-NEXT: .LBB20_76:
; RV32I-NEXT: srl s1, a7, s3
-; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t5, .LBB20_70
-; RV32I-NEXT: .LBB20_72:
+; RV32I-NEXT: j .LBB20_74
+; RV32I-NEXT: .LBB20_77:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t2, s2
-; RV32I-NEXT: .LBB20_73:
+; RV32I-NEXT: .LBB20_78:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: beqz s2, .LBB20_75
-; RV32I-NEXT: # %bb.74:
+; RV32I-NEXT: beqz s2, .LBB20_80
+; RV32I-NEXT: # %bb.79:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB20_75:
-; RV32I-NEXT: bltu s9, t5, .LBB20_77
-; RV32I-NEXT: # %bb.76:
+; RV32I-NEXT: .LBB20_80:
+; RV32I-NEXT: bltu s9, t5, .LBB20_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t1, s9
-; RV32I-NEXT: mv s7, a7
-; RV32I-NEXT: bnez s9, .LBB20_78
-; RV32I-NEXT: j .LBB20_79
-; RV32I-NEXT: .LBB20_77:
+; RV32I-NEXT: j .LBB20_83
+; RV32I-NEXT: .LBB20_82:
; RV32I-NEXT: sll s3, t1, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: sll s4, a7, s2
; RV32I-NEXT: or a3, a3, s4
+; RV32I-NEXT: .LBB20_83:
; RV32I-NEXT: mv s7, a7
-; RV32I-NEXT: beqz s9, .LBB20_79
-; RV32I-NEXT: .LBB20_78:
+; RV32I-NEXT: beqz s9, .LBB20_85
+; RV32I-NEXT: # %bb.84:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB20_79:
-; RV32I-NEXT: bltu s2, t6, .LBB20_81
-; RV32I-NEXT: # %bb.80:
+; RV32I-NEXT: .LBB20_85:
+; RV32I-NEXT: bltu s2, t6, .LBB20_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB20_82
-; RV32I-NEXT: .LBB20_81:
+; RV32I-NEXT: j .LBB20_88
+; RV32I-NEXT: .LBB20_87:
; RV32I-NEXT: or s3, a6, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB20_82:
+; RV32I-NEXT: .LBB20_88:
; RV32I-NEXT: addi ra, a5, -128
; RV32I-NEXT: mv s4, t2
; RV32I-NEXT: mv s6, a4
-; RV32I-NEXT: beqz s2, .LBB20_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: beqz s2, .LBB20_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: mv s4, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB20_84:
+; RV32I-NEXT: .LBB20_90:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, a4, s9
-; RV32I-NEXT: bltu ra, t5, .LBB20_86
-; RV32I-NEXT: # %bb.85:
+; RV32I-NEXT: bltu ra, t5, .LBB20_92
+; RV32I-NEXT: # %bb.91:
; RV32I-NEXT: sra a3, a4, ra
-; RV32I-NEXT: mv s1, t2
-; RV32I-NEXT: bnez ra, .LBB20_87
-; RV32I-NEXT: j .LBB20_88
-; RV32I-NEXT: .LBB20_86:
+; RV32I-NEXT: j .LBB20_93
+; RV32I-NEXT: .LBB20_92:
; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
+; RV32I-NEXT: .LBB20_93:
; RV32I-NEXT: mv s1, t2
-; RV32I-NEXT: beqz ra, .LBB20_88
-; RV32I-NEXT: .LBB20_87:
+; RV32I-NEXT: beqz ra, .LBB20_95
+; RV32I-NEXT: # %bb.94:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB20_88:
-; RV32I-NEXT: bltu ra, t5, .LBB20_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: .LBB20_95:
+; RV32I-NEXT: bltu ra, t5, .LBB20_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: srai s2, a4, 31
; RV32I-NEXT: srl a3, a7, ra
-; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: bnez ra, .LBB20_91
-; RV32I-NEXT: j .LBB20_92
-; RV32I-NEXT: .LBB20_90:
+; RV32I-NEXT: j .LBB20_98
+; RV32I-NEXT: .LBB20_97:
; RV32I-NEXT: sra s2, a4, a5
; RV32I-NEXT: sll a3, a7, s9
; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
+; RV32I-NEXT: .LBB20_98:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz ra, .LBB20_92
-; RV32I-NEXT: .LBB20_91:
+; RV32I-NEXT: beqz ra, .LBB20_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB20_92:
+; RV32I-NEXT: .LBB20_100:
; RV32I-NEXT: mv s5, t0
; RV32I-NEXT: sub s10, t6, ra
; RV32I-NEXT: li t0, 64
-; RV32I-NEXT: bltu ra, t5, .LBB20_94
-; RV32I-NEXT: # %bb.93:
+; RV32I-NEXT: bltu ra, t5, .LBB20_102
+; RV32I-NEXT: # %bb.101:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: j .LBB20_95
-; RV32I-NEXT: .LBB20_94:
+; RV32I-NEXT: j .LBB20_103
+; RV32I-NEXT: .LBB20_102:
; RV32I-NEXT: srl s7, a7, a5
-; RV32I-NEXT: .LBB20_95:
+; RV32I-NEXT: .LBB20_103:
; RV32I-NEXT: mv t6, s8
; RV32I-NEXT: mv s8, s11
-; RV32I-NEXT: bltu s10, t5, .LBB20_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: bltu s10, t5, .LBB20_105
+; RV32I-NEXT: # %bb.104:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t2, s10
-; RV32I-NEXT: j .LBB20_98
-; RV32I-NEXT: .LBB20_97:
+; RV32I-NEXT: j .LBB20_106
+; RV32I-NEXT: .LBB20_105:
; RV32I-NEXT: sll s9, t2, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t2, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB20_98:
+; RV32I-NEXT: .LBB20_106:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, a4
-; RV32I-NEXT: beqz s10, .LBB20_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: beqz s10, .LBB20_108
+; RV32I-NEXT: # %bb.107:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB20_100:
-; RV32I-NEXT: bltu s11, t5, .LBB20_102
-; RV32I-NEXT: # %bb.101:
+; RV32I-NEXT: .LBB20_108:
+; RV32I-NEXT: bltu s11, t5, .LBB20_110
+; RV32I-NEXT: # %bb.109:
; RV32I-NEXT: sra a3, a4, s11
-; RV32I-NEXT: bnez s11, .LBB20_103
-; RV32I-NEXT: j .LBB20_104
-; RV32I-NEXT: .LBB20_102:
+; RV32I-NEXT: bnez s11, .LBB20_111
+; RV32I-NEXT: j .LBB20_112
+; RV32I-NEXT: .LBB20_110:
; RV32I-NEXT: srl a3, t2, ra
; RV32I-NEXT: mv s10, s4
; RV32I-NEXT: neg s4, s11
; RV32I-NEXT: sll s4, a4, s4
; RV32I-NEXT: or a3, a3, s4
; RV32I-NEXT: mv s4, s10
-; RV32I-NEXT: beqz s11, .LBB20_104
-; RV32I-NEXT: .LBB20_103:
+; RV32I-NEXT: beqz s11, .LBB20_112
+; RV32I-NEXT: .LBB20_111:
; RV32I-NEXT: mv t2, a3
-; RV32I-NEXT: .LBB20_104:
-; RV32I-NEXT: bltu s11, t5, .LBB20_106
-; RV32I-NEXT: # %bb.105:
+; RV32I-NEXT: .LBB20_112:
+; RV32I-NEXT: bltu s11, t5, .LBB20_114
+; RV32I-NEXT: # %bb.113:
; RV32I-NEXT: srai t5, a4, 31
-; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bltu ra, t0, .LBB20_107
-; RV32I-NEXT: j .LBB20_108
-; RV32I-NEXT: .LBB20_106:
+; RV32I-NEXT: j .LBB20_115
+; RV32I-NEXT: .LBB20_114:
; RV32I-NEXT: sra t5, a4, ra
+; RV32I-NEXT: .LBB20_115:
; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bgeu ra, t0, .LBB20_108
-; RV32I-NEXT: .LBB20_107:
+; RV32I-NEXT: bgeu ra, t0, .LBB20_117
+; RV32I-NEXT: # %bb.116:
; RV32I-NEXT: or t2, a6, s9
; RV32I-NEXT: or t5, s7, s3
-; RV32I-NEXT: .LBB20_108:
+; RV32I-NEXT: .LBB20_117:
; RV32I-NEXT: li a6, 128
-; RV32I-NEXT: bnez ra, .LBB20_117
-; RV32I-NEXT: # %bb.109:
-; RV32I-NEXT: bgeu ra, t0, .LBB20_118
-; RV32I-NEXT: .LBB20_110:
-; RV32I-NEXT: bgeu a5, a6, .LBB20_112
-; RV32I-NEXT: .LBB20_111:
+; RV32I-NEXT: bnez ra, .LBB20_126
+; RV32I-NEXT: # %bb.118:
+; RV32I-NEXT: bgeu ra, t0, .LBB20_127
+; RV32I-NEXT: .LBB20_119:
+; RV32I-NEXT: bgeu a5, a6, .LBB20_121
+; RV32I-NEXT: .LBB20_120:
; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t1, a3, s8
; RV32I-NEXT: lw a3, 4(sp) # 4-byte Folded Reload
@@ -10866,23 +10797,23 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: or s1, a3, s4
; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: .LBB20_112:
+; RV32I-NEXT: .LBB20_121:
; RV32I-NEXT: lw ra, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: mv t0, s5
-; RV32I-NEXT: beqz a5, .LBB20_114
-; RV32I-NEXT: # %bb.113:
+; RV32I-NEXT: beqz a5, .LBB20_123
+; RV32I-NEXT: # %bb.122:
; RV32I-NEXT: mv s0, t1
; RV32I-NEXT: mv a0, a7
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t3, s2
-; RV32I-NEXT: .LBB20_114:
-; RV32I-NEXT: bltu a5, a6, .LBB20_116
-; RV32I-NEXT: # %bb.115:
+; RV32I-NEXT: .LBB20_123:
+; RV32I-NEXT: bltu a5, a6, .LBB20_125
+; RV32I-NEXT: # %bb.124:
; RV32I-NEXT: srai a1, a4, 31
; RV32I-NEXT: mv t0, a1
; RV32I-NEXT: mv s11, a1
; RV32I-NEXT: mv ra, a1
-; RV32I-NEXT: .LBB20_116:
+; RV32I-NEXT: .LBB20_125:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli a7, s0, 24
@@ -10964,15 +10895,15 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB20_117:
+; RV32I-NEXT: .LBB20_126:
; RV32I-NEXT: mv t1, t2
; RV32I-NEXT: mv a7, t5
-; RV32I-NEXT: bltu ra, t0, .LBB20_110
-; RV32I-NEXT: .LBB20_118:
+; RV32I-NEXT: bltu ra, t0, .LBB20_119
+; RV32I-NEXT: .LBB20_127:
; RV32I-NEXT: srai s1, a4, 31
; RV32I-NEXT: mv s2, s1
-; RV32I-NEXT: bltu a5, a6, .LBB20_111
-; RV32I-NEXT: j .LBB20_112
+; RV32I-NEXT: bltu a5, a6, .LBB20_120
+; RV32I-NEXT: j .LBB20_121
%src = load i256, ptr %src.ptr, align 1
%dwordOff = load i256, ptr %dwordOff.ptr, align 1
%bitOff = shl i256 %dwordOff, 6
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index 694662eab1681..71431c452233f 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv32 -O0 -debug-pass=Structure < %s -o /dev/null 2>&1 | \
; RUN: grep -v "Verify generated machine code" | \
; RUN: FileCheck %s --check-prefixes=CHECK
@@ -22,7 +23,7 @@
; CHECK-NEXT: Expand large div/rem
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand Atomic instructions
-; CHECK-NEXT: RISC-V Zacas ABI fix
+; CHECK-NEXT: RISC-V Zacas ABI fix
; CHECK-NEXT: Module Verifier
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
@@ -62,6 +63,10 @@
; CHECK-NEXT: Insert fentry calls
; CHECK-NEXT: Insert XRay ops
; CHECK-NEXT: Implement the 'patchable-function' attribute
+; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Machine Natural Loop Construction
+; CHECK-NEXT: Machine Block Frequency Analysis
+; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
; CHECK-NEXT: Contiguously Lay Out Funclets
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 19de864422bc5..5b34e9defcdb8 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv32 -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | \
; RUN: grep -v "Verify generated machine code" | \
; RUN: FileCheck %s --check-prefixes=CHECK
@@ -26,7 +27,7 @@
; CHECK-NEXT: Expand large div/rem
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand Atomic instructions
-; CHECK-NEXT: RISC-V Zacas ABI fix
+; CHECK-NEXT: RISC-V Zacas ABI fix
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
@@ -195,6 +196,10 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: RISC-V Late Branch Optimisation Pass
+; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Machine Natural Loop Construction
+; CHECK-NEXT: Machine Block Frequency Analysis
+; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
; CHECK-NEXT: Contiguously Lay Out Funclets
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
index 8534ad379ebab..d3c6a1322ebbe 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
@@ -208,28 +208,26 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: bnez a0, .LBB11_6
+; RV32-NEXT: bnez a0, .LBB11_7
; RV32-NEXT: .LBB11_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a5, s0, .LBB11_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1
; RV32-NEXT: slt a0, s0, a5
-; RV32-NEXT: mv a2, a4
-; RV32-NEXT: mv a3, a5
-; RV32-NEXT: bnez a0, .LBB11_1
; RV32-NEXT: j .LBB11_5
; RV32-NEXT: .LBB11_4: # in Loop: Header=BB11_2 Depth=1
; RV32-NEXT: sltu a0, s2, a4
+; RV32-NEXT: .LBB11_5: # in Loop: Header=BB11_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: bnez a0, .LBB11_1
-; RV32-NEXT: .LBB11_5: # %atomicrmw.start
+; RV32-NEXT: # %bb.6: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1
; RV32-NEXT: mv a2, s2
; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB11_1
-; RV32-NEXT: .LBB11_6: # %atomicrmw.end
+; RV32-NEXT: .LBB11_7: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -284,28 +282,26 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: bnez a0, .LBB13_6
+; RV32-NEXT: bnez a0, .LBB13_7
; RV32-NEXT: .LBB13_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a5, s0, .LBB13_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1
; RV32-NEXT: sltu a0, s0, a5
-; RV32-NEXT: mv a2, a4
-; RV32-NEXT: mv a3, a5
-; RV32-NEXT: bnez a0, .LBB13_1
; RV32-NEXT: j .LBB13_5
; RV32-NEXT: .LBB13_4: # in Loop: Header=BB13_2 Depth=1
; RV32-NEXT: sltu a0, s2, a4
+; RV32-NEXT: .LBB13_5: # in Loop: Header=BB13_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: bnez a0, .LBB13_1
-; RV32-NEXT: .LBB13_5: # %atomicrmw.start
+; RV32-NEXT: # %bb.6: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1
; RV32-NEXT: mv a2, s2
; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB13_1
-; RV32-NEXT: .LBB13_6: # %atomicrmw.end
+; RV32-NEXT: .LBB13_7: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -360,28 +356,26 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: bnez a0, .LBB15_6
+; RV32-NEXT: bnez a0, .LBB15_7
; RV32-NEXT: .LBB15_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a5, s0, .LBB15_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1
; RV32-NEXT: slt a0, s0, a5
-; RV32-NEXT: mv a2, a4
-; RV32-NEXT: mv a3, a5
-; RV32-NEXT: beqz a0, .LBB15_1
; RV32-NEXT: j .LBB15_5
; RV32-NEXT: .LBB15_4: # in Loop: Header=BB15_2 Depth=1
; RV32-NEXT: sltu a0, s2, a4
+; RV32-NEXT: .LBB15_5: # in Loop: Header=BB15_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: beqz a0, .LBB15_1
-; RV32-NEXT: .LBB15_5: # %atomicrmw.start
+; RV32-NEXT: # %bb.6: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1
; RV32-NEXT: mv a2, s2
; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB15_1
-; RV32-NEXT: .LBB15_6: # %atomicrmw.end
+; RV32-NEXT: .LBB15_7: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -436,28 +430,26 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: bnez a0, .LBB17_6
+; RV32-NEXT: bnez a0, .LBB17_7
; RV32-NEXT: .LBB17_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a5, s0, .LBB17_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1
; RV32-NEXT: sltu a0, s0, a5
-; RV32-NEXT: mv a2, a4
-; RV32-NEXT: mv a3, a5
-; RV32-NEXT: beqz a0, .LBB17_1
; RV32-NEXT: j .LBB17_5
; RV32-NEXT: .LBB17_4: # in Loop: Header=BB17_2 Depth=1
; RV32-NEXT: sltu a0, s2, a4
+; RV32-NEXT: .LBB17_5: # in Loop: Header=BB17_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: beqz a0, .LBB17_1
-; RV32-NEXT: .LBB17_5: # %atomicrmw.start
+; RV32-NEXT: # %bb.6: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1
; RV32-NEXT: mv a2, s2
; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB17_1
-; RV32-NEXT: .LBB17_6: # %atomicrmw.end
+; RV32-NEXT: .LBB17_7: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index aea7473ceece4..6ac265ca4a988 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -4992,20 +4992,20 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a2, a1, 1
; RV32IA-NEXT: mv a1, a0
-; RV32IA-NEXT: beqz a2, .LBB60_2
-; RV32IA-NEXT: # %bb.1: # %then
-; RV32IA-NEXT: li a0, 1
-; RV32IA-NEXT: amomax.w a0, a0, (a1)
-; RV32IA-NEXT: ret
-; RV32IA-NEXT: .LBB60_2: # %else
+; RV32IA-NEXT: bnez a2, .LBB60_4
+; RV32IA-NEXT: # %bb.1: # %else
; RV32IA-NEXT: lw a0, 0(a1)
; RV32IA-NEXT: mv a2, a0
-; RV32IA-NEXT: bgtz a0, .LBB60_4
-; RV32IA-NEXT: # %bb.3: # %else
+; RV32IA-NEXT: bgtz a0, .LBB60_3
+; RV32IA-NEXT: # %bb.2: # %else
; RV32IA-NEXT: li a2, 1
-; RV32IA-NEXT: .LBB60_4: # %else
+; RV32IA-NEXT: .LBB60_3: # %else
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
+; RV32IA-NEXT: .LBB60_4: # %then
+; RV32IA-NEXT: li a0, 1
+; RV32IA-NEXT: amomax.w a0, a0, (a1)
+; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i32_monotonic_crossbb:
; RV64I: # %bb.0:
@@ -5056,19 +5056,19 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA: # %bb.0:
; RV64IA-NEXT: andi a2, a1, 1
; RV64IA-NEXT: mv a1, a0
-; RV64IA-NEXT: beqz a2, .LBB60_2
-; RV64IA-NEXT: # %bb.1: # %then
-; RV64IA-NEXT: li a0, 1
-; RV64IA-NEXT: amomax.w a0, a0, (a1)
-; RV64IA-NEXT: ret
-; RV64IA-NEXT: .LBB60_2: # %else
+; RV64IA-NEXT: bnez a2, .LBB60_4
+; RV64IA-NEXT: # %bb.1: # %else
; RV64IA-NEXT: lw a0, 0(a1)
; RV64IA-NEXT: mv a2, a0
-; RV64IA-NEXT: bgtz a0, .LBB60_4
-; RV64IA-NEXT: # %bb.3: # %else
+; RV64IA-NEXT: bgtz a0, .LBB60_3
+; RV64IA-NEXT: # %bb.2: # %else
; RV64IA-NEXT: li a2, 1
-; RV64IA-NEXT: .LBB60_4: # %else
+; RV64IA-NEXT: .LBB60_3: # %else
; RV64IA-NEXT: sw a2, 0(a1)
+; RV64IA-NEXT: ret
+; RV64IA-NEXT: .LBB60_4: # %then
+; RV64IA-NEXT: li a0, 1
+; RV64IA-NEXT: amomax.w a0, a0, (a1)
; RV64IA-NEXT: ret
br i1 %c, label %then, label %else
@@ -5140,20 +5140,20 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a2, a1, 1
; RV32IA-NEXT: mv a1, a0
-; RV32IA-NEXT: beqz a2, .LBB61_2
-; RV32IA-NEXT: # %bb.1: # %then
-; RV32IA-NEXT: li a0, 1
-; RV32IA-NEXT: amomin.w a0, a0, (a1)
-; RV32IA-NEXT: ret
-; RV32IA-NEXT: .LBB61_2: # %else
+; RV32IA-NEXT: bnez a2, .LBB61_4
+; RV32IA-NEXT: # %bb.1: # %else
; RV32IA-NEXT: lw a0, 0(a1)
; RV32IA-NEXT: mv a2, a0
-; RV32IA-NEXT: blez a0, .LBB61_4
-; RV32IA-NEXT: # %bb.3: # %else
+; RV32IA-NEXT: blez a0, .LBB61_3
+; RV32IA-NEXT: # %bb.2: # %else
; RV32IA-NEXT: li a2, 1
-; RV32IA-NEXT: .LBB61_4: # %else
+; RV32IA-NEXT: .LBB61_3: # %else
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
+; RV32IA-NEXT: .LBB61_4: # %then
+; RV32IA-NEXT: li a0, 1
+; RV32IA-NEXT: amomin.w a0, a0, (a1)
+; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i32_monotonic_crossbb:
; RV64I: # %bb.0:
@@ -5206,19 +5206,19 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA: # %bb.0:
; RV64IA-NEXT: andi a2, a1, 1
; RV64IA-NEXT: mv a1, a0
-; RV64IA-NEXT: beqz a2, .LBB61_2
-; RV64IA-NEXT: # %bb.1: # %then
-; RV64IA-NEXT: li a0, 1
-; RV64IA-NEXT: amomin.w a0, a0, (a1)
-; RV64IA-NEXT: ret
-; RV64IA-NEXT: .LBB61_2: # %else
+; RV64IA-NEXT: bnez a2, .LBB61_4
+; RV64IA-NEXT: # %bb.1: # %else
; RV64IA-NEXT: lw a0, 0(a1)
; RV64IA-NEXT: mv a2, a0
-; RV64IA-NEXT: blez a0, .LBB61_4
-; RV64IA-NEXT: # %bb.3: # %else
+; RV64IA-NEXT: blez a0, .LBB61_3
+; RV64IA-NEXT: # %bb.2: # %else
; RV64IA-NEXT: li a2, 1
-; RV64IA-NEXT: .LBB61_4: # %else
+; RV64IA-NEXT: .LBB61_3: # %else
; RV64IA-NEXT: sw a2, 0(a1)
+; RV64IA-NEXT: ret
+; RV64IA-NEXT: .LBB61_4: # %then
+; RV64IA-NEXT: li a0, 1
+; RV64IA-NEXT: amomin.w a0, a0, (a1)
; RV64IA-NEXT: ret
br i1 %c, label %then, label %else
@@ -5418,21 +5418,21 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a2, a1, 1
; RV32IA-NEXT: mv a1, a0
-; RV32IA-NEXT: beqz a2, .LBB63_2
-; RV32IA-NEXT: # %bb.1: # %then
-; RV32IA-NEXT: li a0, 1
-; RV32IA-NEXT: amominu.w a0, a0, (a1)
-; RV32IA-NEXT: ret
-; RV32IA-NEXT: .LBB63_2: # %else
+; RV32IA-NEXT: bnez a2, .LBB63_4
+; RV32IA-NEXT: # %bb.1: # %else
; RV32IA-NEXT: lw a0, 0(a1)
; RV32IA-NEXT: li a3, 1
; RV32IA-NEXT: mv a2, a0
-; RV32IA-NEXT: bltu a0, a3, .LBB63_4
-; RV32IA-NEXT: # %bb.3: # %else
+; RV32IA-NEXT: bltu a0, a3, .LBB63_3
+; RV32IA-NEXT: # %bb.2: # %else
; RV32IA-NEXT: li a2, 1
-; RV32IA-NEXT: .LBB63_4: # %else
+; RV32IA-NEXT: .LBB63_3: # %else
; RV32IA-NEXT: sw a2, 0(a1)
; RV32IA-NEXT: ret
+; RV32IA-NEXT: .LBB63_4: # %then
+; RV32IA-NEXT: li a0, 1
+; RV32IA-NEXT: amominu.w a0, a0, (a1)
+; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i32_monotonic_crossbb:
; RV64I: # %bb.0:
@@ -5486,20 +5486,20 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64IA: # %bb.0:
; RV64IA-NEXT: andi a2, a1, 1
; RV64IA-NEXT: mv a1, a0
-; RV64IA-NEXT: beqz a2, .LBB63_2
-; RV64IA-NEXT: # %bb.1: # %then
-; RV64IA-NEXT: li a0, 1
-; RV64IA-NEXT: amominu.w a0, a0, (a1)
-; RV64IA-NEXT: ret
-; RV64IA-NEXT: .LBB63_2: # %else
+; RV64IA-NEXT: bnez a2, .LBB63_4
+; RV64IA-NEXT: # %bb.1: # %else
; RV64IA-NEXT: lw a0, 0(a1)
; RV64IA-NEXT: li a3, 1
; RV64IA-NEXT: mv a2, a0
-; RV64IA-NEXT: bltu a0, a3, .LBB63_4
-; RV64IA-NEXT: # %bb.3: # %else
+; RV64IA-NEXT: bltu a0, a3, .LBB63_3
+; RV64IA-NEXT: # %bb.2: # %else
; RV64IA-NEXT: li a2, 1
-; RV64IA-NEXT: .LBB63_4: # %else
+; RV64IA-NEXT: .LBB63_3: # %else
; RV64IA-NEXT: sw a2, 0(a1)
+; RV64IA-NEXT: ret
+; RV64IA-NEXT: .LBB63_4: # %then
+; RV64IA-NEXT: li a0, 1
+; RV64IA-NEXT: amominu.w a0, a0, (a1)
; RV64IA-NEXT: ret
br i1 %c, label %then, label %else
diff --git a/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll b/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll
index b2558cde29832..41049195360fc 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-br-fcmp.ll
@@ -11,20 +11,12 @@ declare bfloat @dummy(bfloat)
define void @br_fcmp_false(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-LABEL: br_fcmp_false:
; RV32IZFBFMIN: # %bb.0:
-; RV32IZFBFMIN-NEXT: j .LBB0_2
-; RV32IZFBFMIN-NEXT: # %bb.1: # %if.then
-; RV32IZFBFMIN-NEXT: ret
-; RV32IZFBFMIN-NEXT: .LBB0_2: # %if.else
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: call abort
;
; RV64IZFBFMIN-LABEL: br_fcmp_false:
; RV64IZFBFMIN: # %bb.0:
-; RV64IZFBFMIN-NEXT: j .LBB0_2
-; RV64IZFBFMIN-NEXT: # %bb.1: # %if.then
-; RV64IZFBFMIN-NEXT: ret
-; RV64IZFBFMIN-NEXT: .LBB0_2: # %if.else
; RV64IZFBFMIN-NEXT: addi sp, sp, -16
; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFBFMIN-NEXT: call abort
@@ -581,20 +573,12 @@ if.then:
define void @br_fcmp_true(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-LABEL: br_fcmp_true:
; RV32IZFBFMIN: # %bb.0:
-; RV32IZFBFMIN-NEXT: j .LBB16_2
-; RV32IZFBFMIN-NEXT: # %bb.1: # %if.else
-; RV32IZFBFMIN-NEXT: ret
-; RV32IZFBFMIN-NEXT: .LBB16_2: # %if.then
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: call abort
;
; RV64IZFBFMIN-LABEL: br_fcmp_true:
; RV64IZFBFMIN: # %bb.0:
-; RV64IZFBFMIN-NEXT: j .LBB16_2
-; RV64IZFBFMIN-NEXT: # %bb.1: # %if.else
-; RV64IZFBFMIN-NEXT: ret
-; RV64IZFBFMIN-NEXT: .LBB16_2: # %if.then
; RV64IZFBFMIN-NEXT: addi sp, sp, -16
; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFBFMIN-NEXT: call abort
diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll
index d69ab0550a034..ccc9d30f2ef1b 100644
--- a/llvm/test/CodeGen/RISCV/bittest.ll
+++ b/llvm/test/CodeGen/RISCV/bittest.ll
@@ -444,93 +444,93 @@ define void @bittest_switch(i32 signext %0) {
; RV32I-LABEL: bittest_switch:
; RV32I: # %bb.0:
; RV32I-NEXT: li a1, 31
-; RV32I-NEXT: bltu a1, a0, .LBB14_3
+; RV32I-NEXT: bltu a1, a0, .LBB14_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lui a1, 524291
; RV32I-NEXT: addi a1, a1, 768
; RV32I-NEXT: srl a0, a1, a0
; RV32I-NEXT: andi a0, a0, 1
-; RV32I-NEXT: beqz a0, .LBB14_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB14_3:
+; RV32I-NEXT: bnez a0, .LBB14_3
+; RV32I-NEXT: .LBB14_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB14_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: bittest_switch:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 31
-; RV64I-NEXT: bltu a1, a0, .LBB14_3
+; RV64I-NEXT: bltu a1, a0, .LBB14_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lui a1, 2048
; RV64I-NEXT: addiw a1, a1, 51
; RV64I-NEXT: slli a1, a1, 8
; RV64I-NEXT: srl a0, a1, a0
; RV64I-NEXT: andi a0, a0, 1
-; RV64I-NEXT: beqz a0, .LBB14_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB14_3:
+; RV64I-NEXT: bnez a0, .LBB14_3
+; RV64I-NEXT: .LBB14_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB14_3:
+; RV64I-NEXT: tail bar
;
; RV32ZBS-LABEL: bittest_switch:
; RV32ZBS: # %bb.0:
; RV32ZBS-NEXT: li a1, 31
-; RV32ZBS-NEXT: bltu a1, a0, .LBB14_3
+; RV32ZBS-NEXT: bltu a1, a0, .LBB14_2
; RV32ZBS-NEXT: # %bb.1:
; RV32ZBS-NEXT: lui a1, 524291
; RV32ZBS-NEXT: addi a1, a1, 768
; RV32ZBS-NEXT: bext a0, a1, a0
-; RV32ZBS-NEXT: beqz a0, .LBB14_3
-; RV32ZBS-NEXT: # %bb.2:
-; RV32ZBS-NEXT: tail bar
-; RV32ZBS-NEXT: .LBB14_3:
+; RV32ZBS-NEXT: bnez a0, .LBB14_3
+; RV32ZBS-NEXT: .LBB14_2:
; RV32ZBS-NEXT: ret
+; RV32ZBS-NEXT: .LBB14_3:
+; RV32ZBS-NEXT: tail bar
;
; RV64ZBS-LABEL: bittest_switch:
; RV64ZBS: # %bb.0:
; RV64ZBS-NEXT: li a1, 31
-; RV64ZBS-NEXT: bltu a1, a0, .LBB14_3
+; RV64ZBS-NEXT: bltu a1, a0, .LBB14_2
; RV64ZBS-NEXT: # %bb.1:
; RV64ZBS-NEXT: lui a1, 2048
; RV64ZBS-NEXT: addiw a1, a1, 51
; RV64ZBS-NEXT: slli a1, a1, 8
; RV64ZBS-NEXT: bext a0, a1, a0
-; RV64ZBS-NEXT: beqz a0, .LBB14_3
-; RV64ZBS-NEXT: # %bb.2:
-; RV64ZBS-NEXT: tail bar
-; RV64ZBS-NEXT: .LBB14_3:
+; RV64ZBS-NEXT: bnez a0, .LBB14_3
+; RV64ZBS-NEXT: .LBB14_2:
; RV64ZBS-NEXT: ret
+; RV64ZBS-NEXT: .LBB14_3:
+; RV64ZBS-NEXT: tail bar
;
; RV32XTHEADBS-LABEL: bittest_switch:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: li a1, 31
-; RV32XTHEADBS-NEXT: bltu a1, a0, .LBB14_3
+; RV32XTHEADBS-NEXT: bltu a1, a0, .LBB14_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: lui a1, 524291
; RV32XTHEADBS-NEXT: addi a1, a1, 768
; RV32XTHEADBS-NEXT: srl a0, a1, a0
; RV32XTHEADBS-NEXT: andi a0, a0, 1
-; RV32XTHEADBS-NEXT: beqz a0, .LBB14_3
-; RV32XTHEADBS-NEXT: # %bb.2:
-; RV32XTHEADBS-NEXT: tail bar
-; RV32XTHEADBS-NEXT: .LBB14_3:
+; RV32XTHEADBS-NEXT: bnez a0, .LBB14_3
+; RV32XTHEADBS-NEXT: .LBB14_2:
; RV32XTHEADBS-NEXT: ret
+; RV32XTHEADBS-NEXT: .LBB14_3:
+; RV32XTHEADBS-NEXT: tail bar
;
; RV64XTHEADBS-LABEL: bittest_switch:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: li a1, 31
-; RV64XTHEADBS-NEXT: bltu a1, a0, .LBB14_3
+; RV64XTHEADBS-NEXT: bltu a1, a0, .LBB14_2
; RV64XTHEADBS-NEXT: # %bb.1:
; RV64XTHEADBS-NEXT: lui a1, 2048
; RV64XTHEADBS-NEXT: addiw a1, a1, 51
; RV64XTHEADBS-NEXT: slli a1, a1, 8
; RV64XTHEADBS-NEXT: srl a0, a1, a0
; RV64XTHEADBS-NEXT: andi a0, a0, 1
-; RV64XTHEADBS-NEXT: beqz a0, .LBB14_3
-; RV64XTHEADBS-NEXT: # %bb.2:
-; RV64XTHEADBS-NEXT: tail bar
-; RV64XTHEADBS-NEXT: .LBB14_3:
+; RV64XTHEADBS-NEXT: bnez a0, .LBB14_3
+; RV64XTHEADBS-NEXT: .LBB14_2:
; RV64XTHEADBS-NEXT: ret
+; RV64XTHEADBS-NEXT: .LBB14_3:
+; RV64XTHEADBS-NEXT: tail bar
switch i32 %0, label %3 [
i32 8, label %2
i32 9, label %2
@@ -1241,11 +1241,11 @@ define void @bit_10_z_branch_i32(i32 signext %0) {
; CHECK-LABEL: bit_10_z_branch_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1024
-; CHECK-NEXT: bnez a0, .LBB37_2
+; CHECK-NEXT: beqz a0, .LBB37_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: tail bar
-; CHECK-NEXT: .LBB37_2:
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB37_2:
+; CHECK-NEXT: tail bar
%2 = and i32 %0, 1024
%3 = icmp eq i32 %2, 0
br i1 %3, label %4, label %5
@@ -1262,11 +1262,11 @@ define void @bit_10_nz_branch_i32(i32 signext %0) {
; CHECK-LABEL: bit_10_nz_branch_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1024
-; CHECK-NEXT: beqz a0, .LBB38_2
+; CHECK-NEXT: bnez a0, .LBB38_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: tail bar
-; CHECK-NEXT: .LBB38_2:
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB38_2:
+; CHECK-NEXT: tail bar
%2 = and i32 %0, 1024
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -1283,20 +1283,20 @@ define void @bit_11_z_branch_i32(i32 signext %0) {
; RV32-LABEL: bit_11_z_branch_i32:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 20
-; RV32-NEXT: bltz a0, .LBB39_2
+; RV32-NEXT: bgez a0, .LBB39_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB39_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB39_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_11_z_branch_i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 52
-; RV64-NEXT: bltz a0, .LBB39_2
+; RV64-NEXT: bgez a0, .LBB39_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB39_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB39_2:
+; RV64-NEXT: tail bar
%2 = and i32 %0, 2048
%3 = icmp eq i32 %2, 0
br i1 %3, label %4, label %5
@@ -1313,20 +1313,20 @@ define void @bit_11_nz_branch_i32(i32 signext %0) {
; RV32-LABEL: bit_11_nz_branch_i32:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 20
-; RV32-NEXT: bgez a0, .LBB40_2
+; RV32-NEXT: bltz a0, .LBB40_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB40_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB40_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_11_nz_branch_i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 52
-; RV64-NEXT: bgez a0, .LBB40_2
+; RV64-NEXT: bltz a0, .LBB40_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB40_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB40_2:
+; RV64-NEXT: tail bar
%2 = and i32 %0, 2048
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -1343,20 +1343,20 @@ define void @bit_24_z_branch_i32(i32 signext %0) {
; RV32-LABEL: bit_24_z_branch_i32:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 7
-; RV32-NEXT: bltz a0, .LBB41_2
+; RV32-NEXT: bgez a0, .LBB41_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB41_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB41_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_24_z_branch_i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 39
-; RV64-NEXT: bltz a0, .LBB41_2
+; RV64-NEXT: bgez a0, .LBB41_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB41_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB41_2:
+; RV64-NEXT: tail bar
%2 = and i32 %0, 16777216
%3 = icmp eq i32 %2, 0
br i1 %3, label %4, label %5
@@ -1373,20 +1373,20 @@ define void @bit_24_nz_branch_i32(i32 signext %0) {
; RV32-LABEL: bit_24_nz_branch_i32:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 7
-; RV32-NEXT: bgez a0, .LBB42_2
+; RV32-NEXT: bltz a0, .LBB42_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB42_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB42_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_24_nz_branch_i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 39
-; RV64-NEXT: bgez a0, .LBB42_2
+; RV64-NEXT: bltz a0, .LBB42_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB42_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB42_2:
+; RV64-NEXT: tail bar
%2 = and i32 %0, 16777216
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -1402,21 +1402,21 @@ define void @bit_24_nz_branch_i32(i32 signext %0) {
define void @bit_31_z_branch_i32(i32 signext %0) {
; RV32-LABEL: bit_31_z_branch_i32:
; RV32: # %bb.0:
-; RV32-NEXT: bltz a0, .LBB43_2
+; RV32-NEXT: bgez a0, .LBB43_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB43_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB43_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_31_z_branch_i32:
; RV64: # %bb.0:
; RV64-NEXT: lui a1, 524288
; RV64-NEXT: and a0, a0, a1
-; RV64-NEXT: bnez a0, .LBB43_2
+; RV64-NEXT: beqz a0, .LBB43_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB43_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB43_2:
+; RV64-NEXT: tail bar
%2 = and i32 %0, 2147483648
%3 = icmp eq i32 %2, 0
br i1 %3, label %4, label %5
@@ -1432,21 +1432,21 @@ define void @bit_31_z_branch_i32(i32 signext %0) {
define void @bit_31_nz_branch_i32(i32 signext %0) {
; RV32-LABEL: bit_31_nz_branch_i32:
; RV32: # %bb.0:
-; RV32-NEXT: bgez a0, .LBB44_2
+; RV32-NEXT: bltz a0, .LBB44_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB44_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB44_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_31_nz_branch_i32:
; RV64: # %bb.0:
; RV64-NEXT: lui a1, 524288
; RV64-NEXT: and a0, a0, a1
-; RV64-NEXT: beqz a0, .LBB44_2
+; RV64-NEXT: bnez a0, .LBB44_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB44_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB44_2:
+; RV64-NEXT: tail bar
%2 = and i32 %0, 2147483648
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -1463,11 +1463,11 @@ define void @bit_10_z_branch_i64(i64 %0) {
; CHECK-LABEL: bit_10_z_branch_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1024
-; CHECK-NEXT: bnez a0, .LBB45_2
+; CHECK-NEXT: beqz a0, .LBB45_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: tail bar
-; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB45_2:
+; CHECK-NEXT: tail bar
%2 = and i64 %0, 1024
%3 = icmp eq i64 %2, 0
br i1 %3, label %4, label %5
@@ -1484,11 +1484,11 @@ define void @bit_10_nz_branch_i64(i64 %0) {
; CHECK-LABEL: bit_10_nz_branch_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1024
-; CHECK-NEXT: beqz a0, .LBB46_2
+; CHECK-NEXT: bnez a0, .LBB46_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: tail bar
-; CHECK-NEXT: .LBB46_2:
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB46_2:
+; CHECK-NEXT: tail bar
%2 = and i64 %0, 1024
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -1505,20 +1505,20 @@ define void @bit_11_z_branch_i64(i64 %0) {
; RV32-LABEL: bit_11_z_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 20
-; RV32-NEXT: bltz a0, .LBB47_2
+; RV32-NEXT: bgez a0, .LBB47_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB47_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB47_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_11_z_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 52
-; RV64-NEXT: bltz a0, .LBB47_2
+; RV64-NEXT: bgez a0, .LBB47_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB47_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB47_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 2048
%3 = icmp eq i64 %2, 0
br i1 %3, label %4, label %5
@@ -1535,20 +1535,20 @@ define void @bit_11_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_11_nz_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 20
-; RV32-NEXT: bgez a0, .LBB48_2
+; RV32-NEXT: bltz a0, .LBB48_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB48_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB48_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_11_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 52
-; RV64-NEXT: bgez a0, .LBB48_2
+; RV64-NEXT: bltz a0, .LBB48_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB48_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB48_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 2048
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -1565,20 +1565,20 @@ define void @bit_24_z_branch_i64(i64 %0) {
; RV32-LABEL: bit_24_z_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 7
-; RV32-NEXT: bltz a0, .LBB49_2
+; RV32-NEXT: bgez a0, .LBB49_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB49_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB49_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_24_z_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 39
-; RV64-NEXT: bltz a0, .LBB49_2
+; RV64-NEXT: bgez a0, .LBB49_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB49_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB49_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 16777216
%3 = icmp eq i64 %2, 0
br i1 %3, label %4, label %5
@@ -1595,20 +1595,20 @@ define void @bit_24_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_24_nz_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 7
-; RV32-NEXT: bgez a0, .LBB50_2
+; RV32-NEXT: bltz a0, .LBB50_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB50_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB50_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_24_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 39
-; RV64-NEXT: bgez a0, .LBB50_2
+; RV64-NEXT: bltz a0, .LBB50_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB50_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB50_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 16777216
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -1624,20 +1624,20 @@ define void @bit_24_nz_branch_i64(i64 %0) {
define void @bit_31_z_branch_i64(i64 %0) {
; RV32-LABEL: bit_31_z_branch_i64:
; RV32: # %bb.0:
-; RV32-NEXT: bltz a0, .LBB51_2
+; RV32-NEXT: bgez a0, .LBB51_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB51_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB51_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_31_z_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: bltz a0, .LBB51_2
+; RV64-NEXT: bgez a0, .LBB51_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB51_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB51_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 2147483648
%3 = icmp eq i64 %2, 0
br i1 %3, label %4, label %5
@@ -1653,20 +1653,20 @@ define void @bit_31_z_branch_i64(i64 %0) {
define void @bit_31_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_31_nz_branch_i64:
; RV32: # %bb.0:
-; RV32-NEXT: bgez a0, .LBB52_2
+; RV32-NEXT: bltz a0, .LBB52_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB52_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB52_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_31_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: bgez a0, .LBB52_2
+; RV64-NEXT: bltz a0, .LBB52_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB52_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB52_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 2147483648
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -1683,20 +1683,20 @@ define void @bit_32_z_branch_i64(i64 %0) {
; RV32-LABEL: bit_32_z_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: bnez a1, .LBB53_2
+; RV32-NEXT: beqz a1, .LBB53_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB53_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB53_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_32_z_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 31
-; RV64-NEXT: bltz a0, .LBB53_2
+; RV64-NEXT: bgez a0, .LBB53_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB53_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB53_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 4294967296
%3 = icmp eq i64 %2, 0
br i1 %3, label %4, label %5
@@ -1713,20 +1713,20 @@ define void @bit_32_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_32_nz_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: beqz a1, .LBB54_2
+; RV32-NEXT: bnez a1, .LBB54_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB54_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB54_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_32_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 31
-; RV64-NEXT: bgez a0, .LBB54_2
+; RV64-NEXT: bltz a0, .LBB54_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB54_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB54_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 4294967296
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -1743,20 +1743,20 @@ define void @bit_62_z_branch_i64(i64 %0) {
; RV32-LABEL: bit_62_z_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
-; RV32-NEXT: bltz a1, .LBB55_2
+; RV32-NEXT: bgez a1, .LBB55_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB55_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB55_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_62_z_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 1
-; RV64-NEXT: bltz a0, .LBB55_2
+; RV64-NEXT: bgez a0, .LBB55_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB55_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB55_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 4611686018427387904
%3 = icmp eq i64 %2, 0
br i1 %3, label %4, label %5
@@ -1773,20 +1773,20 @@ define void @bit_62_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_62_nz_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
-; RV32-NEXT: bgez a1, .LBB56_2
+; RV32-NEXT: bltz a1, .LBB56_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB56_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB56_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_62_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 1
-; RV64-NEXT: bgez a0, .LBB56_2
+; RV64-NEXT: bltz a0, .LBB56_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB56_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB56_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 4611686018427387904
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -1802,19 +1802,19 @@ define void @bit_62_nz_branch_i64(i64 %0) {
define void @bit_63_z_branch_i64(i64 %0) {
; RV32-LABEL: bit_63_z_branch_i64:
; RV32: # %bb.0:
-; RV32-NEXT: bltz a1, .LBB57_2
+; RV32-NEXT: bgez a1, .LBB57_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB57_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB57_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_63_z_branch_i64:
; RV64: # %bb.0:
-; RV64-NEXT: bltz a0, .LBB57_2
+; RV64-NEXT: bgez a0, .LBB57_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB57_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB57_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 9223372036854775808
%3 = icmp eq i64 %2, 0
br i1 %3, label %4, label %5
@@ -1830,19 +1830,19 @@ define void @bit_63_z_branch_i64(i64 %0) {
define void @bit_63_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_63_nz_branch_i64:
; RV32: # %bb.0:
-; RV32-NEXT: bgez a1, .LBB58_2
+; RV32-NEXT: bltz a1, .LBB58_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB58_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB58_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_63_nz_branch_i64:
; RV64: # %bb.0:
-; RV64-NEXT: bgez a0, .LBB58_2
+; RV64-NEXT: bltz a0, .LBB58_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB58_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB58_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 9223372036854775808
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -2692,11 +2692,11 @@ define void @bit_10_1_nz_branch_i32(i32 signext %0) {
; CHECK-LABEL: bit_10_1_nz_branch_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1023
-; CHECK-NEXT: beqz a0, .LBB90_2
+; CHECK-NEXT: bnez a0, .LBB90_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: tail bar
-; CHECK-NEXT: .LBB90_2:
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB90_2:
+; CHECK-NEXT: tail bar
%2 = and i32 %0, 1023
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -2734,11 +2734,11 @@ define void @bit_11_1_nz_branch_i32(i32 signext %0) {
; CHECK-LABEL: bit_11_1_nz_branch_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 2047
-; CHECK-NEXT: beqz a0, .LBB92_2
+; CHECK-NEXT: bnez a0, .LBB92_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: tail bar
-; CHECK-NEXT: .LBB92_2:
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB92_2:
+; CHECK-NEXT: tail bar
%2 = and i32 %0, 2047
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -2785,20 +2785,20 @@ define void @bit_16_1_nz_branch_i32(i32 signext %0) {
; RV32-LABEL: bit_16_1_nz_branch_i32:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: beqz a0, .LBB94_2
+; RV32-NEXT: bnez a0, .LBB94_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB94_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB94_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_16_1_nz_branch_i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: beqz a0, .LBB94_2
+; RV64-NEXT: bnez a0, .LBB94_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB94_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB94_2:
+; RV64-NEXT: tail bar
%2 = and i32 %0, 65535
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -2845,20 +2845,20 @@ define void @bit_24_1_nz_branch_i32(i32 signext %0) {
; RV32-LABEL: bit_24_1_nz_branch_i32:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 8
-; RV32-NEXT: beqz a0, .LBB96_2
+; RV32-NEXT: bnez a0, .LBB96_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB96_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB96_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_24_1_nz_branch_i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 40
-; RV64-NEXT: beqz a0, .LBB96_2
+; RV64-NEXT: bnez a0, .LBB96_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB96_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB96_2:
+; RV64-NEXT: tail bar
%2 = and i32 %0, 16777215
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -2905,20 +2905,20 @@ define void @bit_31_1_nz_branch_i32(i32 signext %0) {
; RV32-LABEL: bit_31_1_nz_branch_i32:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: beqz a0, .LBB98_2
+; RV32-NEXT: bnez a0, .LBB98_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB98_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB98_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_31_1_nz_branch_i32:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 33
-; RV64-NEXT: beqz a0, .LBB98_2
+; RV64-NEXT: bnez a0, .LBB98_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB98_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB98_2:
+; RV64-NEXT: tail bar
%2 = and i32 %0, 2147483647
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -2954,11 +2954,11 @@ define void @bit_32_1_z_branch_i32(i32 signext %0) {
define void @bit_32_1_nz_branch_i32(i32 signext %0) {
; CHECK-LABEL: bit_32_1_nz_branch_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a0, .LBB100_2
+; CHECK-NEXT: bnez a0, .LBB100_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: tail bar
-; CHECK-NEXT: .LBB100_2:
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB100_2:
+; CHECK-NEXT: tail bar
%2 = and i32 %0, 4294967295
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -2997,11 +2997,11 @@ define void @bit_10_1_nz_branch_i64(i64 %0) {
; CHECK-LABEL: bit_10_1_nz_branch_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1023
-; CHECK-NEXT: beqz a0, .LBB102_2
+; CHECK-NEXT: bnez a0, .LBB102_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: tail bar
-; CHECK-NEXT: .LBB102_2:
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB102_2:
+; CHECK-NEXT: tail bar
%2 = and i64 %0, 1023
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -3039,11 +3039,11 @@ define void @bit_11_1_nz_branch_i64(i64 %0) {
; CHECK-LABEL: bit_11_1_nz_branch_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 2047
-; CHECK-NEXT: beqz a0, .LBB104_2
+; CHECK-NEXT: bnez a0, .LBB104_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: tail bar
-; CHECK-NEXT: .LBB104_2:
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB104_2:
+; CHECK-NEXT: tail bar
%2 = and i64 %0, 2047
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -3090,20 +3090,20 @@ define void @bit_16_1_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_16_1_nz_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: beqz a0, .LBB106_2
+; RV32-NEXT: bnez a0, .LBB106_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB106_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB106_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_16_1_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: beqz a0, .LBB106_2
+; RV64-NEXT: bnez a0, .LBB106_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB106_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB106_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 65535
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -3150,20 +3150,20 @@ define void @bit_24_1_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_24_1_nz_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 8
-; RV32-NEXT: beqz a0, .LBB108_2
+; RV32-NEXT: bnez a0, .LBB108_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB108_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB108_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_24_1_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 40
-; RV64-NEXT: beqz a0, .LBB108_2
+; RV64-NEXT: bnez a0, .LBB108_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB108_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB108_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 16777215
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -3210,20 +3210,20 @@ define void @bit_31_1_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_31_1_nz_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: beqz a0, .LBB110_2
+; RV32-NEXT: bnez a0, .LBB110_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB110_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB110_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_31_1_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 33
-; RV64-NEXT: beqz a0, .LBB110_2
+; RV64-NEXT: bnez a0, .LBB110_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB110_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB110_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 2147483647
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -3268,20 +3268,20 @@ define void @bit_32_1_z_branch_i64(i64 %0) {
define void @bit_32_1_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_32_1_nz_branch_i64:
; RV32: # %bb.0:
-; RV32-NEXT: beqz a0, .LBB112_2
+; RV32-NEXT: bnez a0, .LBB112_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB112_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB112_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_32_1_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: sext.w a0, a0
-; RV64-NEXT: beqz a0, .LBB112_2
+; RV64-NEXT: bnez a0, .LBB112_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB112_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB112_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 4294967295
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -3332,20 +3332,20 @@ define void @bit_62_1_nz_branch_i64(i64 %0) {
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: beqz a0, .LBB114_2
+; RV32-NEXT: bnez a0, .LBB114_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB114_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB114_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_62_1_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 2
-; RV64-NEXT: beqz a0, .LBB114_2
+; RV64-NEXT: bnez a0, .LBB114_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB114_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB114_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 4611686018427387903
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -3417,41 +3417,41 @@ define void @bit_63_1_nz_branch_i64(i64 %0) {
; RV32I-NEXT: slli a1, a1, 1
; RV32I-NEXT: srli a1, a1, 1
; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: beqz a0, .LBB116_2
+; RV32I-NEXT: bnez a0, .LBB116_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB116_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB116_2:
+; RV32I-NEXT: tail bar
;
; RV64-LABEL: bit_63_1_nz_branch_i64:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 1
-; RV64-NEXT: beqz a0, .LBB116_2
+; RV64-NEXT: bnez a0, .LBB116_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB116_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB116_2:
+; RV64-NEXT: tail bar
;
; RV32ZBS-LABEL: bit_63_1_nz_branch_i64:
; RV32ZBS: # %bb.0:
; RV32ZBS-NEXT: bclri a1, a1, 31
; RV32ZBS-NEXT: or a0, a0, a1
-; RV32ZBS-NEXT: beqz a0, .LBB116_2
+; RV32ZBS-NEXT: bnez a0, .LBB116_2
; RV32ZBS-NEXT: # %bb.1:
-; RV32ZBS-NEXT: tail bar
-; RV32ZBS-NEXT: .LBB116_2:
; RV32ZBS-NEXT: ret
+; RV32ZBS-NEXT: .LBB116_2:
+; RV32ZBS-NEXT: tail bar
;
; RV32XTHEADBS-LABEL: bit_63_1_nz_branch_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: slli a1, a1, 1
; RV32XTHEADBS-NEXT: srli a1, a1, 1
; RV32XTHEADBS-NEXT: or a0, a0, a1
-; RV32XTHEADBS-NEXT: beqz a0, .LBB116_2
+; RV32XTHEADBS-NEXT: bnez a0, .LBB116_2
; RV32XTHEADBS-NEXT: # %bb.1:
-; RV32XTHEADBS-NEXT: tail bar
-; RV32XTHEADBS-NEXT: .LBB116_2:
; RV32XTHEADBS-NEXT: ret
+; RV32XTHEADBS-NEXT: .LBB116_2:
+; RV32XTHEADBS-NEXT: tail bar
%2 = and i64 %0, 9223372036854775807
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
@@ -3497,19 +3497,19 @@ define void @bit_64_1_nz_branch_i64(i64 %0) {
; RV32-LABEL: bit_64_1_nz_branch_i64:
; RV32: # %bb.0:
; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: beqz a0, .LBB118_2
+; RV32-NEXT: bnez a0, .LBB118_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: tail bar
-; RV32-NEXT: .LBB118_2:
; RV32-NEXT: ret
+; RV32-NEXT: .LBB118_2:
+; RV32-NEXT: tail bar
;
; RV64-LABEL: bit_64_1_nz_branch_i64:
; RV64: # %bb.0:
-; RV64-NEXT: beqz a0, .LBB118_2
+; RV64-NEXT: bnez a0, .LBB118_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar
-; RV64-NEXT: .LBB118_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB118_2:
+; RV64-NEXT: tail bar
%2 = and i64 %0, 18446744073709551615
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
diff --git a/llvm/test/CodeGen/RISCV/branch_zero.ll b/llvm/test/CodeGen/RISCV/branch_zero.ll
index 9f96f0d94a27a..93b99a8683d0c 100644
--- a/llvm/test/CodeGen/RISCV/branch_zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch_zero.ll
@@ -5,13 +5,12 @@
define void @foo(i16 %finder_idx) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: # %bb.1: # %for.body
; CHECK-NEXT: slli a0, a0, 48
-; CHECK-NEXT: bltz a0, .LBB0_4
-; CHECK-NEXT: # %bb.2: # %while.cond.preheader.i
+; CHECK-NEXT: bltz a0, .LBB0_3
+; CHECK-NEXT: # %bb.1: # %while.cond.preheader.i
; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: # %bb.3: # %while.body
-; CHECK-NEXT: .LBB0_4: # %while.cond1.preheader.i
+; CHECK-NEXT: # %bb.2: # %while.body
+; CHECK-NEXT: .LBB0_3: # %while.cond1.preheader.i
entry:
br label %for.body
@@ -43,13 +42,12 @@ if.then:
define void @bar(i16 %finder_idx) {
; CHECK-LABEL: bar:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: # %bb.1: # %for.body
; CHECK-NEXT: slli a0, a0, 48
-; CHECK-NEXT: bgez a0, .LBB1_4
-; CHECK-NEXT: # %bb.2: # %while.cond.preheader.i
+; CHECK-NEXT: bgez a0, .LBB1_3
+; CHECK-NEXT: # %bb.1: # %while.cond.preheader.i
; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: # %bb.3: # %while.body
-; CHECK-NEXT: .LBB1_4: # %while.cond1.preheader.i
+; CHECK-NEXT: # %bb.2: # %while.body
+; CHECK-NEXT: .LBB1_3: # %while.cond1.preheader.i
entry:
br label %for.body
diff --git a/llvm/test/CodeGen/RISCV/cmp-bool.ll b/llvm/test/CodeGen/RISCV/cmp-bool.ll
index 547e12cce0a06..a4247bf37cf5c 100644
--- a/llvm/test/CodeGen/RISCV/cmp-bool.ll
+++ b/llvm/test/CodeGen/RISCV/cmp-bool.ll
@@ -33,19 +33,19 @@ if.end:
define void @bool_ne(i1 zeroext %a, i1 zeroext %b, ptr nocapture %c) nounwind {
; RV32-LABEL: bool_ne:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: beq a0, a1, .LBB1_2
-; RV32-NEXT: # %bb.1: # %if.then
-; RV32-NEXT: jr a2
-; RV32-NEXT: .LBB1_2: # %if.end
+; RV32-NEXT: bne a0, a1, .LBB1_2
+; RV32-NEXT: # %bb.1: # %if.end
; RV32-NEXT: ret
+; RV32-NEXT: .LBB1_2: # %if.then
+; RV32-NEXT: jr a2
;
; RV64-LABEL: bool_ne:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: beq a0, a1, .LBB1_2
-; RV64-NEXT: # %bb.1: # %if.then
-; RV64-NEXT: jr a2
-; RV64-NEXT: .LBB1_2: # %if.end
+; RV64-NEXT: bne a0, a1, .LBB1_2
+; RV64-NEXT: # %bb.1: # %if.end
; RV64-NEXT: ret
+; RV64-NEXT: .LBB1_2: # %if.then
+; RV64-NEXT: jr a2
entry:
%cmp = xor i1 %a, %b
br i1 %cmp, label %if.then, label %if.end
diff --git a/llvm/test/CodeGen/RISCV/copyprop.ll b/llvm/test/CodeGen/RISCV/copyprop.ll
index ddf58403382c5..b87f8baa77964 100644
--- a/llvm/test/CodeGen/RISCV/copyprop.ll
+++ b/llvm/test/CodeGen/RISCV/copyprop.ll
@@ -7,23 +7,21 @@ define void @copyprop_after_mbp(i32 %v, ptr %a, ptr %b, ptr %c, ptr %d) {
; NOPROP: # %bb.0:
; NOPROP-NEXT: sext.w a0, a0
; NOPROP-NEXT: li a5, 10
-; NOPROP-NEXT: bne a0, a5, .LBB0_2
+; NOPROP-NEXT: bne a0, a5, .LBB0_3
; NOPROP-NEXT: # %bb.1: # %bb.0
; NOPROP-NEXT: li a0, 15
; NOPROP-NEXT: sw a0, 0(a2)
; NOPROP-NEXT: li a0, 1
+; NOPROP-NEXT: .LBB0_2: # %bb.0
; NOPROP-NEXT: sw a0, 0(a1)
; NOPROP-NEXT: li a0, 12
; NOPROP-NEXT: sw a0, 0(a4)
; NOPROP-NEXT: ret
-; NOPROP-NEXT: .LBB0_2: # %bb.1
+; NOPROP-NEXT: .LBB0_3: # %bb.1
; NOPROP-NEXT: li a0, 0
; NOPROP-NEXT: li a2, 25
; NOPROP-NEXT: sw a2, 0(a3)
-; NOPROP-NEXT: sw a0, 0(a1)
-; NOPROP-NEXT: li a0, 12
-; NOPROP-NEXT: sw a0, 0(a4)
-; NOPROP-NEXT: ret
+; NOPROP-NEXT: j .LBB0_2
;
; PROP-LABEL: copyprop_after_mbp:
; PROP: # %bb.0:
@@ -35,13 +33,12 @@ define void @copyprop_after_mbp(i32 %v, ptr %a, ptr %b, ptr %c, ptr %d) {
; PROP-NEXT: sw a0, 0(a2)
; PROP-NEXT: li a0, 1
; PROP-NEXT: sw a0, 0(a1)
-; PROP-NEXT: li a0, 12
-; PROP-NEXT: sw a0, 0(a4)
-; PROP-NEXT: ret
+; PROP-NEXT: j .LBB0_3
; PROP-NEXT: .LBB0_2: # %bb.1
; PROP-NEXT: li a2, 25
; PROP-NEXT: sw a2, 0(a3)
; PROP-NEXT: sw zero, 0(a1)
+; PROP-NEXT: .LBB0_3: # %bb.1
; PROP-NEXT: li a0, 12
; PROP-NEXT: sw a0, 0(a4)
; PROP-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/csr-first-use-cost.ll b/llvm/test/CodeGen/RISCV/csr-first-use-cost.ll
index c6e5bae3c3c24..5a16d3d4992cd 100644
--- a/llvm/test/CodeGen/RISCV/csr-first-use-cost.ll
+++ b/llvm/test/CodeGen/RISCV/csr-first-use-cost.ll
@@ -13,28 +13,22 @@ define fastcc void @Perl_sv_setnv(i8 %c, ptr %.str.54.3682) nounwind {
; ZERO-COST-NEXT: li a2, 2
; ZERO-COST-NEXT: blt a2, a0, .LBB0_3
; ZERO-COST-NEXT: # %bb.1: # %entry
-; ZERO-COST-NEXT: beqz a0, .LBB0_4
+; ZERO-COST-NEXT: beqz a0, .LBB0_7
; ZERO-COST-NEXT: # %bb.2: # %entry
; ZERO-COST-NEXT: mv s0, a1
; ZERO-COST-NEXT: li a1, 1
-; ZERO-COST-NEXT: beq a0, a1, .LBB0_6
-; ZERO-COST-NEXT: j .LBB0_7
+; ZERO-COST-NEXT: beq a0, a1, .LBB0_5
+; ZERO-COST-NEXT: j .LBB0_6
; ZERO-COST-NEXT: .LBB0_3: # %entry
; ZERO-COST-NEXT: li a2, 3
-; ZERO-COST-NEXT: bne a0, a2, .LBB0_5
-; ZERO-COST-NEXT: .LBB0_4: # %sw.bb3
-; ZERO-COST-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; ZERO-COST-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; ZERO-COST-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; ZERO-COST-NEXT: addi sp, sp, 32
-; ZERO-COST-NEXT: ret
-; ZERO-COST-NEXT: .LBB0_5: # %entry
+; ZERO-COST-NEXT: beq a0, a2, .LBB0_7
+; ZERO-COST-NEXT: # %bb.4: # %entry
; ZERO-COST-NEXT: mv s0, a1
; ZERO-COST-NEXT: li a1, 12
-; ZERO-COST-NEXT: bne a0, a1, .LBB0_7
-; ZERO-COST-NEXT: .LBB0_6: # %sw.bb34.i
+; ZERO-COST-NEXT: bne a0, a1, .LBB0_6
+; ZERO-COST-NEXT: .LBB0_5: # %sw.bb34.i
; ZERO-COST-NEXT: li s0, 0
-; ZERO-COST-NEXT: .LBB0_7: # %Perl_sv_reftype.exit
+; ZERO-COST-NEXT: .LBB0_6: # %Perl_sv_reftype.exit
; ZERO-COST-NEXT: li s1, 0
; ZERO-COST-NEXT: li a0, 0
; ZERO-COST-NEXT: li a1, 0
@@ -43,6 +37,12 @@ define fastcc void @Perl_sv_setnv(i8 %c, ptr %.str.54.3682) nounwind {
; ZERO-COST-NEXT: mv a1, s0
; ZERO-COST-NEXT: li a2, 0
; ZERO-COST-NEXT: jalr s1
+; ZERO-COST-NEXT: .LBB0_7: # %sw.bb3
+; ZERO-COST-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; ZERO-COST-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZERO-COST-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; ZERO-COST-NEXT: addi sp, sp, 32
+; ZERO-COST-NEXT: ret
;
; DEFAULT-COST-LABEL: Perl_sv_setnv:
; DEFAULT-COST: # %bb.0: # %entry
@@ -53,27 +53,22 @@ define fastcc void @Perl_sv_setnv(i8 %c, ptr %.str.54.3682) nounwind {
; DEFAULT-COST-NEXT: li a2, 2
; DEFAULT-COST-NEXT: blt a2, a0, .LBB0_3
; DEFAULT-COST-NEXT: # %bb.1: # %entry
-; DEFAULT-COST-NEXT: beqz a0, .LBB0_4
+; DEFAULT-COST-NEXT: beqz a0, .LBB0_7
; DEFAULT-COST-NEXT: # %bb.2: # %entry
; DEFAULT-COST-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
; DEFAULT-COST-NEXT: li a1, 1
-; DEFAULT-COST-NEXT: beq a0, a1, .LBB0_6
-; DEFAULT-COST-NEXT: j .LBB0_7
+; DEFAULT-COST-NEXT: beq a0, a1, .LBB0_5
+; DEFAULT-COST-NEXT: j .LBB0_6
; DEFAULT-COST-NEXT: .LBB0_3: # %entry
; DEFAULT-COST-NEXT: li a2, 3
-; DEFAULT-COST-NEXT: bne a0, a2, .LBB0_5
-; DEFAULT-COST-NEXT: .LBB0_4: # %sw.bb3
-; DEFAULT-COST-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; DEFAULT-COST-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; DEFAULT-COST-NEXT: addi sp, sp, 32
-; DEFAULT-COST-NEXT: ret
-; DEFAULT-COST-NEXT: .LBB0_5: # %entry
+; DEFAULT-COST-NEXT: beq a0, a2, .LBB0_7
+; DEFAULT-COST-NEXT: # %bb.4: # %entry
; DEFAULT-COST-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
; DEFAULT-COST-NEXT: li a1, 12
-; DEFAULT-COST-NEXT: bne a0, a1, .LBB0_7
-; DEFAULT-COST-NEXT: .LBB0_6: # %sw.bb34.i
+; DEFAULT-COST-NEXT: bne a0, a1, .LBB0_6
+; DEFAULT-COST-NEXT: .LBB0_5: # %sw.bb34.i
; DEFAULT-COST-NEXT: sd zero, 8(sp) # 8-byte Folded Spill
-; DEFAULT-COST-NEXT: .LBB0_7: # %Perl_sv_reftype.exit
+; DEFAULT-COST-NEXT: .LBB0_6: # %Perl_sv_reftype.exit
; DEFAULT-COST-NEXT: li s0, 0
; DEFAULT-COST-NEXT: li a0, 0
; DEFAULT-COST-NEXT: li a1, 0
@@ -82,6 +77,11 @@ define fastcc void @Perl_sv_setnv(i8 %c, ptr %.str.54.3682) nounwind {
; DEFAULT-COST-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
; DEFAULT-COST-NEXT: li a2, 0
; DEFAULT-COST-NEXT: jalr s0
+; DEFAULT-COST-NEXT: .LBB0_7: # %sw.bb3
+; DEFAULT-COST-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; DEFAULT-COST-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; DEFAULT-COST-NEXT: addi sp, sp, 32
+; DEFAULT-COST-NEXT: ret
entry:
switch i8 %c, label %Perl_sv_reftype.exit [
i8 1, label %sw.bb4
diff --git a/llvm/test/CodeGen/RISCV/double-br-fcmp.ll b/llvm/test/CodeGen/RISCV/double-br-fcmp.ll
index b2c882878f8bc..bea51fc4322d0 100644
--- a/llvm/test/CodeGen/RISCV/double-br-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/double-br-fcmp.ll
@@ -14,40 +14,24 @@ declare void @exit(i32)
define void @br_fcmp_false(double %a, double %b) nounwind {
; RV32IFD-LABEL: br_fcmp_false:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: j .LBB0_2
-; RV32IFD-NEXT: # %bb.1: # %if.then
-; RV32IFD-NEXT: ret
-; RV32IFD-NEXT: .LBB0_2: # %if.else
; RV32IFD-NEXT: addi sp, sp, -16
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: call abort
;
; RV64IFD-LABEL: br_fcmp_false:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: j .LBB0_2
-; RV64IFD-NEXT: # %bb.1: # %if.then
-; RV64IFD-NEXT: ret
-; RV64IFD-NEXT: .LBB0_2: # %if.else
; RV64IFD-NEXT: addi sp, sp, -16
; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: call abort
;
; RV32IZFINXZDINX-LABEL: br_fcmp_false:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: j .LBB0_2
-; RV32IZFINXZDINX-NEXT: # %bb.1: # %if.then
-; RV32IZFINXZDINX-NEXT: ret
-; RV32IZFINXZDINX-NEXT: .LBB0_2: # %if.else
; RV32IZFINXZDINX-NEXT: addi sp, sp, -16
; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call abort
;
; RV64IZFINXZDINX-LABEL: br_fcmp_false:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: j .LBB0_2
-; RV64IZFINXZDINX-NEXT: # %bb.1: # %if.then
-; RV64IZFINXZDINX-NEXT: ret
-; RV64IZFINXZDINX-NEXT: .LBB0_2: # %if.else
; RV64IZFINXZDINX-NEXT: addi sp, sp, -16
; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINXZDINX-NEXT: call abort
@@ -893,40 +877,24 @@ if.then:
define void @br_fcmp_true(double %a, double %b) nounwind {
; RV32IFD-LABEL: br_fcmp_true:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: j .LBB16_2
-; RV32IFD-NEXT: # %bb.1: # %if.else
-; RV32IFD-NEXT: ret
-; RV32IFD-NEXT: .LBB16_2: # %if.then
; RV32IFD-NEXT: addi sp, sp, -16
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: call abort
;
; RV64IFD-LABEL: br_fcmp_true:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: j .LBB16_2
-; RV64IFD-NEXT: # %bb.1: # %if.else
-; RV64IFD-NEXT: ret
-; RV64IFD-NEXT: .LBB16_2: # %if.then
; RV64IFD-NEXT: addi sp, sp, -16
; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: call abort
;
; RV32IZFINXZDINX-LABEL: br_fcmp_true:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: j .LBB16_2
-; RV32IZFINXZDINX-NEXT: # %bb.1: # %if.else
-; RV32IZFINXZDINX-NEXT: ret
-; RV32IZFINXZDINX-NEXT: .LBB16_2: # %if.then
; RV32IZFINXZDINX-NEXT: addi sp, sp, -16
; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call abort
;
; RV64IZFINXZDINX-LABEL: br_fcmp_true:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: j .LBB16_2
-; RV64IZFINXZDINX-NEXT: # %bb.1: # %if.else
-; RV64IZFINXZDINX-NEXT: ret
-; RV64IZFINXZDINX-NEXT: .LBB16_2: # %if.then
; RV64IZFINXZDINX-NEXT: addi sp, sp, -16
; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINXZDINX-NEXT: call abort
diff --git a/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll
index 4ee01cc48b9f0..d3d7dea5ea2be 100644
--- a/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll
+++ b/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll
@@ -20,16 +20,15 @@ define double @fminimum_f64(double %a, double %b) nounwind {
; CHECKIFD-NEXT: feq.d a0, fa0, fa0
; CHECKIFD-NEXT: fmv.d fa5, fa1
; CHECKIFD-NEXT: beqz a0, .LBB0_3
-; CHECKIFD-NEXT: # %bb.1:
+; CHECKIFD-NEXT: .LBB0_1:
; CHECKIFD-NEXT: feq.d a0, fa1, fa1
; CHECKIFD-NEXT: beqz a0, .LBB0_4
-; CHECKIFD-NEXT: .LBB0_2:
+; CHECKIFD-NEXT: # %bb.2:
; CHECKIFD-NEXT: fmin.d fa0, fa0, fa5
; CHECKIFD-NEXT: ret
; CHECKIFD-NEXT: .LBB0_3:
; CHECKIFD-NEXT: fmv.d fa5, fa0
-; CHECKIFD-NEXT: feq.d a0, fa1, fa1
-; CHECKIFD-NEXT: bnez a0, .LBB0_2
+; CHECKIFD-NEXT: j .LBB0_1
; CHECKIFD-NEXT: .LBB0_4:
; CHECKIFD-NEXT: fmin.d fa0, fa1, fa5
; CHECKIFD-NEXT: ret
@@ -40,17 +39,16 @@ define double @fminimum_f64(double %a, double %b) nounwind {
; RV32IZFINXZDINX-NEXT: mv a4, a2
; RV32IZFINXZDINX-NEXT: mv a5, a3
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB0_3
-; RV32IZFINXZDINX-NEXT: # %bb.1:
+; RV32IZFINXZDINX-NEXT: .LBB0_1:
; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB0_4
-; RV32IZFINXZDINX-NEXT: .LBB0_2:
+; RV32IZFINXZDINX-NEXT: # %bb.2:
; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a4
; RV32IZFINXZDINX-NEXT: ret
; RV32IZFINXZDINX-NEXT: .LBB0_3:
; RV32IZFINXZDINX-NEXT: mv a4, a0
; RV32IZFINXZDINX-NEXT: mv a5, a1
-; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
-; RV32IZFINXZDINX-NEXT: bnez a6, .LBB0_2
+; RV32IZFINXZDINX-NEXT: j .LBB0_1
; RV32IZFINXZDINX-NEXT: .LBB0_4:
; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: mv a1, a3
@@ -62,16 +60,15 @@ define double @fminimum_f64(double %a, double %b) nounwind {
; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0
; RV64IZFINXZDINX-NEXT: mv a2, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB0_3
-; RV64IZFINXZDINX-NEXT: # %bb.1:
+; RV64IZFINXZDINX-NEXT: .LBB0_1:
; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB0_4
-; RV64IZFINXZDINX-NEXT: .LBB0_2:
+; RV64IZFINXZDINX-NEXT: # %bb.2:
; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2
; RV64IZFINXZDINX-NEXT: ret
; RV64IZFINXZDINX-NEXT: .LBB0_3:
; RV64IZFINXZDINX-NEXT: mv a2, a0
-; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
-; RV64IZFINXZDINX-NEXT: bnez a3, .LBB0_2
+; RV64IZFINXZDINX-NEXT: j .LBB0_1
; RV64IZFINXZDINX-NEXT: .LBB0_4:
; RV64IZFINXZDINX-NEXT: fmin.d a0, a1, a2
; RV64IZFINXZDINX-NEXT: ret
@@ -87,16 +84,15 @@ define double @fmaximum_f64(double %a, double %b) nounwind {
; CHECKIFD-NEXT: feq.d a0, fa0, fa0
; CHECKIFD-NEXT: fmv.d fa5, fa1
; CHECKIFD-NEXT: beqz a0, .LBB1_3
-; CHECKIFD-NEXT: # %bb.1:
+; CHECKIFD-NEXT: .LBB1_1:
; CHECKIFD-NEXT: feq.d a0, fa1, fa1
; CHECKIFD-NEXT: beqz a0, .LBB1_4
-; CHECKIFD-NEXT: .LBB1_2:
+; CHECKIFD-NEXT: # %bb.2:
; CHECKIFD-NEXT: fmax.d fa0, fa0, fa5
; CHECKIFD-NEXT: ret
; CHECKIFD-NEXT: .LBB1_3:
; CHECKIFD-NEXT: fmv.d fa5, fa0
-; CHECKIFD-NEXT: feq.d a0, fa1, fa1
-; CHECKIFD-NEXT: bnez a0, .LBB1_2
+; CHECKIFD-NEXT: j .LBB1_1
; CHECKIFD-NEXT: .LBB1_4:
; CHECKIFD-NEXT: fmax.d fa0, fa1, fa5
; CHECKIFD-NEXT: ret
@@ -107,17 +103,16 @@ define double @fmaximum_f64(double %a, double %b) nounwind {
; RV32IZFINXZDINX-NEXT: mv a4, a2
; RV32IZFINXZDINX-NEXT: mv a5, a3
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB1_3
-; RV32IZFINXZDINX-NEXT: # %bb.1:
+; RV32IZFINXZDINX-NEXT: .LBB1_1:
; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB1_4
-; RV32IZFINXZDINX-NEXT: .LBB1_2:
+; RV32IZFINXZDINX-NEXT: # %bb.2:
; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a4
; RV32IZFINXZDINX-NEXT: ret
; RV32IZFINXZDINX-NEXT: .LBB1_3:
; RV32IZFINXZDINX-NEXT: mv a4, a0
; RV32IZFINXZDINX-NEXT: mv a5, a1
-; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
-; RV32IZFINXZDINX-NEXT: bnez a6, .LBB1_2
+; RV32IZFINXZDINX-NEXT: j .LBB1_1
; RV32IZFINXZDINX-NEXT: .LBB1_4:
; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: mv a1, a3
@@ -129,16 +124,15 @@ define double @fmaximum_f64(double %a, double %b) nounwind {
; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0
; RV64IZFINXZDINX-NEXT: mv a2, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB1_3
-; RV64IZFINXZDINX-NEXT: # %bb.1:
+; RV64IZFINXZDINX-NEXT: .LBB1_1:
; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB1_4
-; RV64IZFINXZDINX-NEXT: .LBB1_2:
+; RV64IZFINXZDINX-NEXT: # %bb.2:
; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, a2
; RV64IZFINXZDINX-NEXT: ret
; RV64IZFINXZDINX-NEXT: .LBB1_3:
; RV64IZFINXZDINX-NEXT: mv a2, a0
-; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
-; RV64IZFINXZDINX-NEXT: bnez a3, .LBB1_2
+; RV64IZFINXZDINX-NEXT: j .LBB1_1
; RV64IZFINXZDINX-NEXT: .LBB1_4:
; RV64IZFINXZDINX-NEXT: fmax.d a0, a1, a2
; RV64IZFINXZDINX-NEXT: ret
@@ -171,16 +165,15 @@ define double @fmaximum_nnan_f64(double %a, double %b) nounwind {
; CHECKIFD-NEXT: feq.d a0, fa0, fa0
; CHECKIFD-NEXT: fmv.d fa5, fa1
; CHECKIFD-NEXT: beqz a0, .LBB3_3
-; CHECKIFD-NEXT: # %bb.1:
+; CHECKIFD-NEXT: .LBB3_1:
; CHECKIFD-NEXT: feq.d a0, fa1, fa1
; CHECKIFD-NEXT: beqz a0, .LBB3_4
-; CHECKIFD-NEXT: .LBB3_2:
+; CHECKIFD-NEXT: # %bb.2:
; CHECKIFD-NEXT: fmin.d fa0, fa0, fa5
; CHECKIFD-NEXT: ret
; CHECKIFD-NEXT: .LBB3_3:
; CHECKIFD-NEXT: fmv.d fa5, fa0
-; CHECKIFD-NEXT: feq.d a0, fa1, fa1
-; CHECKIFD-NEXT: bnez a0, .LBB3_2
+; CHECKIFD-NEXT: j .LBB3_1
; CHECKIFD-NEXT: .LBB3_4:
; CHECKIFD-NEXT: fmin.d fa0, fa1, fa5
; CHECKIFD-NEXT: ret
@@ -191,17 +184,16 @@ define double @fmaximum_nnan_f64(double %a, double %b) nounwind {
; RV32IZFINXZDINX-NEXT: mv a4, a2
; RV32IZFINXZDINX-NEXT: mv a5, a3
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB3_3
-; RV32IZFINXZDINX-NEXT: # %bb.1:
+; RV32IZFINXZDINX-NEXT: .LBB3_1:
; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB3_4
-; RV32IZFINXZDINX-NEXT: .LBB3_2:
+; RV32IZFINXZDINX-NEXT: # %bb.2:
; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a4
; RV32IZFINXZDINX-NEXT: ret
; RV32IZFINXZDINX-NEXT: .LBB3_3:
; RV32IZFINXZDINX-NEXT: mv a4, a0
; RV32IZFINXZDINX-NEXT: mv a5, a1
-; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
-; RV32IZFINXZDINX-NEXT: bnez a6, .LBB3_2
+; RV32IZFINXZDINX-NEXT: j .LBB3_1
; RV32IZFINXZDINX-NEXT: .LBB3_4:
; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: mv a1, a3
@@ -213,16 +205,15 @@ define double @fmaximum_nnan_f64(double %a, double %b) nounwind {
; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0
; RV64IZFINXZDINX-NEXT: mv a2, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB3_3
-; RV64IZFINXZDINX-NEXT: # %bb.1:
+; RV64IZFINXZDINX-NEXT: .LBB3_1:
; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB3_4
-; RV64IZFINXZDINX-NEXT: .LBB3_2:
+; RV64IZFINXZDINX-NEXT: # %bb.2:
; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2
; RV64IZFINXZDINX-NEXT: ret
; RV64IZFINXZDINX-NEXT: .LBB3_3:
; RV64IZFINXZDINX-NEXT: mv a2, a0
-; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
-; RV64IZFINXZDINX-NEXT: bnez a3, .LBB3_2
+; RV64IZFINXZDINX-NEXT: j .LBB3_1
; RV64IZFINXZDINX-NEXT: .LBB3_4:
; RV64IZFINXZDINX-NEXT: fmin.d a0, a1, a2
; RV64IZFINXZDINX-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll
index b2892115cac7a..c4f23f251c535 100644
--- a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll
@@ -15,40 +15,24 @@ declare float @dummy(float)
define void @br_fcmp_false(float %a, float %b) nounwind {
; RV32IF-LABEL: br_fcmp_false:
; RV32IF: # %bb.0:
-; RV32IF-NEXT: j .LBB0_2
-; RV32IF-NEXT: # %bb.1: # %if.then
-; RV32IF-NEXT: ret
-; RV32IF-NEXT: .LBB0_2: # %if.else
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: call abort
;
; RV64IF-LABEL: br_fcmp_false:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: j .LBB0_2
-; RV64IF-NEXT: # %bb.1: # %if.then
-; RV64IF-NEXT: ret
-; RV64IF-NEXT: .LBB0_2: # %if.else
; RV64IF-NEXT: addi sp, sp, -16
; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IF-NEXT: call abort
;
; RV32IZFINX-LABEL: br_fcmp_false:
; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: j .LBB0_2
-; RV32IZFINX-NEXT: # %bb.1: # %if.then
-; RV32IZFINX-NEXT: ret
-; RV32IZFINX-NEXT: .LBB0_2: # %if.else
; RV32IZFINX-NEXT: addi sp, sp, -16
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: call abort
;
; RV64IZFINX-LABEL: br_fcmp_false:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: j .LBB0_2
-; RV64IZFINX-NEXT: # %bb.1: # %if.then
-; RV64IZFINX-NEXT: ret
-; RV64IZFINX-NEXT: .LBB0_2: # %if.else
; RV64IZFINX-NEXT: addi sp, sp, -16
; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINX-NEXT: call abort
@@ -894,40 +878,24 @@ if.then:
define void @br_fcmp_true(float %a, float %b) nounwind {
; RV32IF-LABEL: br_fcmp_true:
; RV32IF: # %bb.0:
-; RV32IF-NEXT: j .LBB16_2
-; RV32IF-NEXT: # %bb.1: # %if.else
-; RV32IF-NEXT: ret
-; RV32IF-NEXT: .LBB16_2: # %if.then
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: call abort
;
; RV64IF-LABEL: br_fcmp_true:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: j .LBB16_2
-; RV64IF-NEXT: # %bb.1: # %if.else
-; RV64IF-NEXT: ret
-; RV64IF-NEXT: .LBB16_2: # %if.then
; RV64IF-NEXT: addi sp, sp, -16
; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IF-NEXT: call abort
;
; RV32IZFINX-LABEL: br_fcmp_true:
; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: j .LBB16_2
-; RV32IZFINX-NEXT: # %bb.1: # %if.else
-; RV32IZFINX-NEXT: ret
-; RV32IZFINX-NEXT: .LBB16_2: # %if.then
; RV32IZFINX-NEXT: addi sp, sp, -16
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: call abort
;
; RV64IZFINX-LABEL: br_fcmp_true:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: j .LBB16_2
-; RV64IZFINX-NEXT: # %bb.1: # %if.else
-; RV64IZFINX-NEXT: ret
-; RV64IZFINX-NEXT: .LBB16_2: # %if.then
; RV64IZFINX-NEXT: addi sp, sp, -16
; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINX-NEXT: call abort
diff --git a/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll
index 2e9f8cbf6d2ef..f9320fd14ff08 100644
--- a/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll
+++ b/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll
@@ -32,16 +32,15 @@ define float @fminimum_f32(float %a, float %b) nounwind {
; RV32IF-NEXT: feq.s a0, fa0, fa0
; RV32IF-NEXT: fmv.s fa5, fa1
; RV32IF-NEXT: beqz a0, .LBB0_3
-; RV32IF-NEXT: # %bb.1:
+; RV32IF-NEXT: .LBB0_1:
; RV32IF-NEXT: feq.s a0, fa1, fa1
; RV32IF-NEXT: beqz a0, .LBB0_4
-; RV32IF-NEXT: .LBB0_2:
+; RV32IF-NEXT: # %bb.2:
; RV32IF-NEXT: fmin.s fa0, fa0, fa5
; RV32IF-NEXT: ret
; RV32IF-NEXT: .LBB0_3:
; RV32IF-NEXT: fmv.s fa5, fa0
-; RV32IF-NEXT: feq.s a0, fa1, fa1
-; RV32IF-NEXT: bnez a0, .LBB0_2
+; RV32IF-NEXT: j .LBB0_1
; RV32IF-NEXT: .LBB0_4:
; RV32IF-NEXT: fmin.s fa0, fa1, fa5
; RV32IF-NEXT: ret
@@ -51,16 +50,15 @@ define float @fminimum_f32(float %a, float %b) nounwind {
; RV32IZFINX-NEXT: feq.s a3, a0, a0
; RV32IZFINX-NEXT: mv a2, a1
; RV32IZFINX-NEXT: beqz a3, .LBB0_3
-; RV32IZFINX-NEXT: # %bb.1:
+; RV32IZFINX-NEXT: .LBB0_1:
; RV32IZFINX-NEXT: feq.s a3, a1, a1
; RV32IZFINX-NEXT: beqz a3, .LBB0_4
-; RV32IZFINX-NEXT: .LBB0_2:
+; RV32IZFINX-NEXT: # %bb.2:
; RV32IZFINX-NEXT: fmin.s a0, a0, a2
; RV32IZFINX-NEXT: ret
; RV32IZFINX-NEXT: .LBB0_3:
; RV32IZFINX-NEXT: mv a2, a0
-; RV32IZFINX-NEXT: feq.s a3, a1, a1
-; RV32IZFINX-NEXT: bnez a3, .LBB0_2
+; RV32IZFINX-NEXT: j .LBB0_1
; RV32IZFINX-NEXT: .LBB0_4:
; RV32IZFINX-NEXT: fmin.s a0, a1, a2
; RV32IZFINX-NEXT: ret
@@ -79,16 +77,15 @@ define float @fminimum_f32(float %a, float %b) nounwind {
; RV64IF-NEXT: feq.s a0, fa0, fa0
; RV64IF-NEXT: fmv.s fa5, fa1
; RV64IF-NEXT: beqz a0, .LBB0_3
-; RV64IF-NEXT: # %bb.1:
+; RV64IF-NEXT: .LBB0_1:
; RV64IF-NEXT: feq.s a0, fa1, fa1
; RV64IF-NEXT: beqz a0, .LBB0_4
-; RV64IF-NEXT: .LBB0_2:
+; RV64IF-NEXT: # %bb.2:
; RV64IF-NEXT: fmin.s fa0, fa0, fa5
; RV64IF-NEXT: ret
; RV64IF-NEXT: .LBB0_3:
; RV64IF-NEXT: fmv.s fa5, fa0
-; RV64IF-NEXT: feq.s a0, fa1, fa1
-; RV64IF-NEXT: bnez a0, .LBB0_2
+; RV64IF-NEXT: j .LBB0_1
; RV64IF-NEXT: .LBB0_4:
; RV64IF-NEXT: fmin.s fa0, fa1, fa5
; RV64IF-NEXT: ret
@@ -98,16 +95,15 @@ define float @fminimum_f32(float %a, float %b) nounwind {
; RV64IZFINX-NEXT: feq.s a3, a0, a0
; RV64IZFINX-NEXT: mv a2, a1
; RV64IZFINX-NEXT: beqz a3, .LBB0_3
-; RV64IZFINX-NEXT: # %bb.1:
+; RV64IZFINX-NEXT: .LBB0_1:
; RV64IZFINX-NEXT: feq.s a3, a1, a1
; RV64IZFINX-NEXT: beqz a3, .LBB0_4
-; RV64IZFINX-NEXT: .LBB0_2:
+; RV64IZFINX-NEXT: # %bb.2:
; RV64IZFINX-NEXT: fmin.s a0, a0, a2
; RV64IZFINX-NEXT: ret
; RV64IZFINX-NEXT: .LBB0_3:
; RV64IZFINX-NEXT: mv a2, a0
-; RV64IZFINX-NEXT: feq.s a3, a1, a1
-; RV64IZFINX-NEXT: bnez a3, .LBB0_2
+; RV64IZFINX-NEXT: j .LBB0_1
; RV64IZFINX-NEXT: .LBB0_4:
; RV64IZFINX-NEXT: fmin.s a0, a1, a2
; RV64IZFINX-NEXT: ret
@@ -132,16 +128,15 @@ define float @fmaximum_f32(float %a, float %b) nounwind {
; RV32IF-NEXT: feq.s a0, fa0, fa0
; RV32IF-NEXT: fmv.s fa5, fa1
; RV32IF-NEXT: beqz a0, .LBB1_3
-; RV32IF-NEXT: # %bb.1:
+; RV32IF-NEXT: .LBB1_1:
; RV32IF-NEXT: feq.s a0, fa1, fa1
; RV32IF-NEXT: beqz a0, .LBB1_4
-; RV32IF-NEXT: .LBB1_2:
+; RV32IF-NEXT: # %bb.2:
; RV32IF-NEXT: fmax.s fa0, fa0, fa5
; RV32IF-NEXT: ret
; RV32IF-NEXT: .LBB1_3:
; RV32IF-NEXT: fmv.s fa5, fa0
-; RV32IF-NEXT: feq.s a0, fa1, fa1
-; RV32IF-NEXT: bnez a0, .LBB1_2
+; RV32IF-NEXT: j .LBB1_1
; RV32IF-NEXT: .LBB1_4:
; RV32IF-NEXT: fmax.s fa0, fa1, fa5
; RV32IF-NEXT: ret
@@ -151,16 +146,15 @@ define float @fmaximum_f32(float %a, float %b) nounwind {
; RV32IZFINX-NEXT: feq.s a3, a0, a0
; RV32IZFINX-NEXT: mv a2, a1
; RV32IZFINX-NEXT: beqz a3, .LBB1_3
-; RV32IZFINX-NEXT: # %bb.1:
+; RV32IZFINX-NEXT: .LBB1_1:
; RV32IZFINX-NEXT: feq.s a3, a1, a1
; RV32IZFINX-NEXT: beqz a3, .LBB1_4
-; RV32IZFINX-NEXT: .LBB1_2:
+; RV32IZFINX-NEXT: # %bb.2:
; RV32IZFINX-NEXT: fmax.s a0, a0, a2
; RV32IZFINX-NEXT: ret
; RV32IZFINX-NEXT: .LBB1_3:
; RV32IZFINX-NEXT: mv a2, a0
-; RV32IZFINX-NEXT: feq.s a3, a1, a1
-; RV32IZFINX-NEXT: bnez a3, .LBB1_2
+; RV32IZFINX-NEXT: j .LBB1_1
; RV32IZFINX-NEXT: .LBB1_4:
; RV32IZFINX-NEXT: fmax.s a0, a1, a2
; RV32IZFINX-NEXT: ret
@@ -179,16 +173,15 @@ define float @fmaximum_f32(float %a, float %b) nounwind {
; RV64IF-NEXT: feq.s a0, fa0, fa0
; RV64IF-NEXT: fmv.s fa5, fa1
; RV64IF-NEXT: beqz a0, .LBB1_3
-; RV64IF-NEXT: # %bb.1:
+; RV64IF-NEXT: .LBB1_1:
; RV64IF-NEXT: feq.s a0, fa1, fa1
; RV64IF-NEXT: beqz a0, .LBB1_4
-; RV64IF-NEXT: .LBB1_2:
+; RV64IF-NEXT: # %bb.2:
; RV64IF-NEXT: fmax.s fa0, fa0, fa5
; RV64IF-NEXT: ret
; RV64IF-NEXT: .LBB1_3:
; RV64IF-NEXT: fmv.s fa5, fa0
-; RV64IF-NEXT: feq.s a0, fa1, fa1
-; RV64IF-NEXT: bnez a0, .LBB1_2
+; RV64IF-NEXT: j .LBB1_1
; RV64IF-NEXT: .LBB1_4:
; RV64IF-NEXT: fmax.s fa0, fa1, fa5
; RV64IF-NEXT: ret
@@ -198,16 +191,15 @@ define float @fmaximum_f32(float %a, float %b) nounwind {
; RV64IZFINX-NEXT: feq.s a3, a0, a0
; RV64IZFINX-NEXT: mv a2, a1
; RV64IZFINX-NEXT: beqz a3, .LBB1_3
-; RV64IZFINX-NEXT: # %bb.1:
+; RV64IZFINX-NEXT: .LBB1_1:
; RV64IZFINX-NEXT: feq.s a3, a1, a1
; RV64IZFINX-NEXT: beqz a3, .LBB1_4
-; RV64IZFINX-NEXT: .LBB1_2:
+; RV64IZFINX-NEXT: # %bb.2:
; RV64IZFINX-NEXT: fmax.s a0, a0, a2
; RV64IZFINX-NEXT: ret
; RV64IZFINX-NEXT: .LBB1_3:
; RV64IZFINX-NEXT: mv a2, a0
-; RV64IZFINX-NEXT: feq.s a3, a1, a1
-; RV64IZFINX-NEXT: bnez a3, .LBB1_2
+; RV64IZFINX-NEXT: j .LBB1_1
; RV64IZFINX-NEXT: .LBB1_4:
; RV64IZFINX-NEXT: fmax.s a0, a1, a2
; RV64IZFINX-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll
index e7719dc70660b..f105708e4e77b 100644
--- a/llvm/test/CodeGen/RISCV/forced-atomics.ll
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll
@@ -3367,19 +3367,15 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
; RV32-NEXT: bnez a0, .LBB49_6
; RV32-NEXT: .LBB49_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beqz a1, .LBB49_4
+; RV32-NEXT: beqz a1, .LBB49_7
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
; RV32-NEXT: sgtz a0, a1
+; RV32-NEXT: .LBB49_4: # %atomicrmw.start
+; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: bnez a0, .LBB49_1
-; RV32-NEXT: j .LBB49_5
-; RV32-NEXT: .LBB49_4: # in Loop: Header=BB49_2 Depth=1
-; RV32-NEXT: sltiu a0, a4, 2
-; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: mv a2, a4
-; RV32-NEXT: bnez a0, .LBB49_1
-; RV32-NEXT: .LBB49_5: # %atomicrmw.start
+; RV32-NEXT: # %bb.5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
; RV32-NEXT: li a2, 1
; RV32-NEXT: j .LBB49_1
@@ -3389,6 +3385,10 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
+; RV32-NEXT: .LBB49_7: # in Loop: Header=BB49_2 Depth=1
+; RV32-NEXT: sltiu a0, a4, 2
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: j .LBB49_4
;
; RV64-NO-ATOMIC-LABEL: rmw64_max_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
@@ -3469,25 +3469,24 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 0(sp)
; RV32-NEXT: lw a1, 4(sp)
-; RV32-NEXT: bnez a0, .LBB50_6
+; RV32-NEXT: bnez a0, .LBB50_7
; RV32-NEXT: .LBB50_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beqz a1, .LBB50_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: slti a0, a1, 0
-; RV32-NEXT: mv a2, a4
-; RV32-NEXT: bnez a0, .LBB50_1
; RV32-NEXT: j .LBB50_5
; RV32-NEXT: .LBB50_4: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: sltiu a0, a4, 2
+; RV32-NEXT: .LBB50_5: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: bnez a0, .LBB50_1
-; RV32-NEXT: .LBB50_5: # %atomicrmw.start
+; RV32-NEXT: # %bb.6: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: li a2, 1
; RV32-NEXT: j .LBB50_1
-; RV32-NEXT: .LBB50_6: # %atomicrmw.end
+; RV32-NEXT: .LBB50_7: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index c5c3b199447a9..6ee3bd5349ce7 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -607,15 +607,14 @@ define i16 @stest_f64i16(double %x) {
; RV32IFD-NEXT: lui a1, 8
; RV32IFD-NEXT: addi a1, a1, -1
; RV32IFD-NEXT: bge a0, a1, .LBB9_3
-; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: .LBB9_1: # %entry
; RV32IFD-NEXT: lui a1, 1048568
; RV32IFD-NEXT: bge a1, a0, .LBB9_4
-; RV32IFD-NEXT: .LBB9_2: # %entry
+; RV32IFD-NEXT: # %bb.2: # %entry
; RV32IFD-NEXT: ret
; RV32IFD-NEXT: .LBB9_3: # %entry
; RV32IFD-NEXT: mv a0, a1
-; RV32IFD-NEXT: lui a1, 1048568
-; RV32IFD-NEXT: blt a1, a0, .LBB9_2
+; RV32IFD-NEXT: j .LBB9_1
; RV32IFD-NEXT: .LBB9_4: # %entry
; RV32IFD-NEXT: lui a0, 1048568
; RV32IFD-NEXT: ret
@@ -626,15 +625,14 @@ define i16 @stest_f64i16(double %x) {
; RV64IFD-NEXT: lui a1, 8
; RV64IFD-NEXT: addiw a1, a1, -1
; RV64IFD-NEXT: bge a0, a1, .LBB9_3
-; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: .LBB9_1: # %entry
; RV64IFD-NEXT: lui a1, 1048568
; RV64IFD-NEXT: bge a1, a0, .LBB9_4
-; RV64IFD-NEXT: .LBB9_2: # %entry
+; RV64IFD-NEXT: # %bb.2: # %entry
; RV64IFD-NEXT: ret
; RV64IFD-NEXT: .LBB9_3: # %entry
; RV64IFD-NEXT: mv a0, a1
-; RV64IFD-NEXT: lui a1, 1048568
-; RV64IFD-NEXT: blt a1, a0, .LBB9_2
+; RV64IFD-NEXT: j .LBB9_1
; RV64IFD-NEXT: .LBB9_4: # %entry
; RV64IFD-NEXT: lui a0, 1048568
; RV64IFD-NEXT: ret
@@ -805,15 +803,14 @@ define i16 @stest_f32i16(float %x) {
; RV32-NEXT: lui a1, 8
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: bge a0, a1, .LBB12_3
-; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: .LBB12_1: # %entry
; RV32-NEXT: lui a1, 1048568
; RV32-NEXT: bge a1, a0, .LBB12_4
-; RV32-NEXT: .LBB12_2: # %entry
+; RV32-NEXT: # %bb.2: # %entry
; RV32-NEXT: ret
; RV32-NEXT: .LBB12_3: # %entry
; RV32-NEXT: mv a0, a1
-; RV32-NEXT: lui a1, 1048568
-; RV32-NEXT: blt a1, a0, .LBB12_2
+; RV32-NEXT: j .LBB12_1
; RV32-NEXT: .LBB12_4: # %entry
; RV32-NEXT: lui a0, 1048568
; RV32-NEXT: ret
@@ -824,15 +821,14 @@ define i16 @stest_f32i16(float %x) {
; RV64-NEXT: lui a1, 8
; RV64-NEXT: addiw a1, a1, -1
; RV64-NEXT: bge a0, a1, .LBB12_3
-; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: .LBB12_1: # %entry
; RV64-NEXT: lui a1, 1048568
; RV64-NEXT: bge a1, a0, .LBB12_4
-; RV64-NEXT: .LBB12_2: # %entry
+; RV64-NEXT: # %bb.2: # %entry
; RV64-NEXT: ret
; RV64-NEXT: .LBB12_3: # %entry
; RV64-NEXT: mv a0, a1
-; RV64-NEXT: lui a1, 1048568
-; RV64-NEXT: blt a1, a0, .LBB12_2
+; RV64-NEXT: j .LBB12_1
; RV64-NEXT: .LBB12_4: # %entry
; RV64-NEXT: lui a0, 1048568
; RV64-NEXT: ret
@@ -1100,43 +1096,42 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: beq a1, a5, .LBB18_2
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: sltu a6, a1, a5
-; RV32IF-NEXT: or a7, a2, a4
-; RV32IF-NEXT: bnez a7, .LBB18_3
-; RV32IF-NEXT: j .LBB18_4
+; RV32IF-NEXT: j .LBB18_3
; RV32IF-NEXT: .LBB18_2:
; RV32IF-NEXT: sltiu a6, a3, -1
+; RV32IF-NEXT: .LBB18_3:
; RV32IF-NEXT: or a7, a2, a4
-; RV32IF-NEXT: beqz a7, .LBB18_4
-; RV32IF-NEXT: .LBB18_3: # %entry
+; RV32IF-NEXT: beqz a7, .LBB18_5
+; RV32IF-NEXT: # %bb.4: # %entry
; RV32IF-NEXT: slti a6, a4, 0
-; RV32IF-NEXT: .LBB18_4: # %entry
+; RV32IF-NEXT: .LBB18_5: # %entry
; RV32IF-NEXT: addi a7, a6, -1
; RV32IF-NEXT: neg t0, a6
-; RV32IF-NEXT: bnez a6, .LBB18_6
-; RV32IF-NEXT: # %bb.5: # %entry
+; RV32IF-NEXT: bnez a6, .LBB18_7
+; RV32IF-NEXT: # %bb.6: # %entry
; RV32IF-NEXT: mv a1, a5
-; RV32IF-NEXT: .LBB18_6: # %entry
+; RV32IF-NEXT: .LBB18_7: # %entry
; RV32IF-NEXT: or a3, a7, a3
; RV32IF-NEXT: and a4, t0, a4
; RV32IF-NEXT: and a2, t0, a2
-; RV32IF-NEXT: beq a1, a0, .LBB18_8
-; RV32IF-NEXT: # %bb.7: # %entry
+; RV32IF-NEXT: beq a1, a0, .LBB18_9
+; RV32IF-NEXT: # %bb.8: # %entry
; RV32IF-NEXT: sltu a0, a0, a1
-; RV32IF-NEXT: j .LBB18_9
-; RV32IF-NEXT: .LBB18_8:
+; RV32IF-NEXT: j .LBB18_10
+; RV32IF-NEXT: .LBB18_9:
; RV32IF-NEXT: snez a0, a3
-; RV32IF-NEXT: .LBB18_9: # %entry
+; RV32IF-NEXT: .LBB18_10: # %entry
; RV32IF-NEXT: and a2, a2, a4
; RV32IF-NEXT: li a5, -1
-; RV32IF-NEXT: beq a2, a5, .LBB18_11
-; RV32IF-NEXT: # %bb.10: # %entry
+; RV32IF-NEXT: beq a2, a5, .LBB18_12
+; RV32IF-NEXT: # %bb.11: # %entry
; RV32IF-NEXT: slti a0, a4, 0
; RV32IF-NEXT: xori a0, a0, 1
-; RV32IF-NEXT: .LBB18_11: # %entry
-; RV32IF-NEXT: bnez a0, .LBB18_13
-; RV32IF-NEXT: # %bb.12: # %entry
+; RV32IF-NEXT: .LBB18_12: # %entry
+; RV32IF-NEXT: bnez a0, .LBB18_14
+; RV32IF-NEXT: # %bb.13: # %entry
; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: .LBB18_13: # %entry
+; RV32IF-NEXT: .LBB18_14: # %entry
; RV32IF-NEXT: neg a0, a0
; RV32IF-NEXT: and a0, a0, a3
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -1203,43 +1198,42 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: beq a1, a5, .LBB18_2
; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: sltu a6, a1, a5
-; RV32IFD-NEXT: or a7, a2, a4
-; RV32IFD-NEXT: bnez a7, .LBB18_3
-; RV32IFD-NEXT: j .LBB18_4
+; RV32IFD-NEXT: j .LBB18_3
; RV32IFD-NEXT: .LBB18_2:
; RV32IFD-NEXT: sltiu a6, a3, -1
+; RV32IFD-NEXT: .LBB18_3:
; RV32IFD-NEXT: or a7, a2, a4
-; RV32IFD-NEXT: beqz a7, .LBB18_4
-; RV32IFD-NEXT: .LBB18_3: # %entry
+; RV32IFD-NEXT: beqz a7, .LBB18_5
+; RV32IFD-NEXT: # %bb.4: # %entry
; RV32IFD-NEXT: slti a6, a4, 0
-; RV32IFD-NEXT: .LBB18_4: # %entry
+; RV32IFD-NEXT: .LBB18_5: # %entry
; RV32IFD-NEXT: addi a7, a6, -1
; RV32IFD-NEXT: neg t0, a6
-; RV32IFD-NEXT: bnez a6, .LBB18_6
-; RV32IFD-NEXT: # %bb.5: # %entry
+; RV32IFD-NEXT: bnez a6, .LBB18_7
+; RV32IFD-NEXT: # %bb.6: # %entry
; RV32IFD-NEXT: mv a1, a5
-; RV32IFD-NEXT: .LBB18_6: # %entry
+; RV32IFD-NEXT: .LBB18_7: # %entry
; RV32IFD-NEXT: or a3, a7, a3
; RV32IFD-NEXT: and a4, t0, a4
; RV32IFD-NEXT: and a2, t0, a2
-; RV32IFD-NEXT: beq a1, a0, .LBB18_8
-; RV32IFD-NEXT: # %bb.7: # %entry
+; RV32IFD-NEXT: beq a1, a0, .LBB18_9
+; RV32IFD-NEXT: # %bb.8: # %entry
; RV32IFD-NEXT: sltu a0, a0, a1
-; RV32IFD-NEXT: j .LBB18_9
-; RV32IFD-NEXT: .LBB18_8:
+; RV32IFD-NEXT: j .LBB18_10
+; RV32IFD-NEXT: .LBB18_9:
; RV32IFD-NEXT: snez a0, a3
-; RV32IFD-NEXT: .LBB18_9: # %entry
+; RV32IFD-NEXT: .LBB18_10: # %entry
; RV32IFD-NEXT: and a2, a2, a4
; RV32IFD-NEXT: li a5, -1
-; RV32IFD-NEXT: beq a2, a5, .LBB18_11
-; RV32IFD-NEXT: # %bb.10: # %entry
+; RV32IFD-NEXT: beq a2, a5, .LBB18_12
+; RV32IFD-NEXT: # %bb.11: # %entry
; RV32IFD-NEXT: slti a0, a4, 0
; RV32IFD-NEXT: xori a0, a0, 1
-; RV32IFD-NEXT: .LBB18_11: # %entry
-; RV32IFD-NEXT: bnez a0, .LBB18_13
-; RV32IFD-NEXT: # %bb.12: # %entry
+; RV32IFD-NEXT: .LBB18_12: # %entry
+; RV32IFD-NEXT: bnez a0, .LBB18_14
+; RV32IFD-NEXT: # %bb.13: # %entry
; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: .LBB18_13: # %entry
+; RV32IFD-NEXT: .LBB18_14: # %entry
; RV32IFD-NEXT: neg a0, a0
; RV32IFD-NEXT: and a0, a0, a3
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -1515,43 +1509,42 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: beq a1, a5, .LBB21_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: bnez a7, .LBB21_3
-; RV32-NEXT: j .LBB21_4
+; RV32-NEXT: j .LBB21_3
; RV32-NEXT: .LBB21_2:
; RV32-NEXT: sltiu a6, a3, -1
+; RV32-NEXT: .LBB21_3:
; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: beqz a7, .LBB21_4
-; RV32-NEXT: .LBB21_3: # %entry
+; RV32-NEXT: beqz a7, .LBB21_5
+; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: slti a6, a4, 0
-; RV32-NEXT: .LBB21_4: # %entry
+; RV32-NEXT: .LBB21_5: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
-; RV32-NEXT: bnez a6, .LBB21_6
-; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: bnez a6, .LBB21_7
+; RV32-NEXT: # %bb.6: # %entry
; RV32-NEXT: mv a1, a5
-; RV32-NEXT: .LBB21_6: # %entry
+; RV32-NEXT: .LBB21_7: # %entry
; RV32-NEXT: or a3, a7, a3
; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: beq a1, a0, .LBB21_8
-; RV32-NEXT: # %bb.7: # %entry
+; RV32-NEXT: beq a1, a0, .LBB21_9
+; RV32-NEXT: # %bb.8: # %entry
; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: j .LBB21_9
-; RV32-NEXT: .LBB21_8:
+; RV32-NEXT: j .LBB21_10
+; RV32-NEXT: .LBB21_9:
; RV32-NEXT: snez a0, a3
-; RV32-NEXT: .LBB21_9: # %entry
+; RV32-NEXT: .LBB21_10: # %entry
; RV32-NEXT: and a2, a2, a4
; RV32-NEXT: li a5, -1
-; RV32-NEXT: beq a2, a5, .LBB21_11
-; RV32-NEXT: # %bb.10: # %entry
+; RV32-NEXT: beq a2, a5, .LBB21_12
+; RV32-NEXT: # %bb.11: # %entry
; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: .LBB21_11: # %entry
-; RV32-NEXT: bnez a0, .LBB21_13
-; RV32-NEXT: # %bb.12: # %entry
+; RV32-NEXT: .LBB21_12: # %entry
+; RV32-NEXT: bnez a0, .LBB21_14
+; RV32-NEXT: # %bb.13: # %entry
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB21_13: # %entry
+; RV32-NEXT: .LBB21_14: # %entry
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -1742,43 +1735,42 @@ define i64 @stest_f16i64(half %x) {
; RV32-NEXT: beq a1, a5, .LBB24_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: bnez a7, .LBB24_3
-; RV32-NEXT: j .LBB24_4
+; RV32-NEXT: j .LBB24_3
; RV32-NEXT: .LBB24_2:
; RV32-NEXT: sltiu a6, a3, -1
+; RV32-NEXT: .LBB24_3:
; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: beqz a7, .LBB24_4
-; RV32-NEXT: .LBB24_3: # %entry
+; RV32-NEXT: beqz a7, .LBB24_5
+; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: slti a6, a4, 0
-; RV32-NEXT: .LBB24_4: # %entry
+; RV32-NEXT: .LBB24_5: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
-; RV32-NEXT: bnez a6, .LBB24_6
-; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: bnez a6, .LBB24_7
+; RV32-NEXT: # %bb.6: # %entry
; RV32-NEXT: mv a1, a5
-; RV32-NEXT: .LBB24_6: # %entry
+; RV32-NEXT: .LBB24_7: # %entry
; RV32-NEXT: or a3, a7, a3
; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: beq a1, a0, .LBB24_8
-; RV32-NEXT: # %bb.7: # %entry
+; RV32-NEXT: beq a1, a0, .LBB24_9
+; RV32-NEXT: # %bb.8: # %entry
; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: j .LBB24_9
-; RV32-NEXT: .LBB24_8:
+; RV32-NEXT: j .LBB24_10
+; RV32-NEXT: .LBB24_9:
; RV32-NEXT: snez a0, a3
-; RV32-NEXT: .LBB24_9: # %entry
+; RV32-NEXT: .LBB24_10: # %entry
; RV32-NEXT: and a2, a2, a4
; RV32-NEXT: li a5, -1
-; RV32-NEXT: beq a2, a5, .LBB24_11
-; RV32-NEXT: # %bb.10: # %entry
+; RV32-NEXT: beq a2, a5, .LBB24_12
+; RV32-NEXT: # %bb.11: # %entry
; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: .LBB24_11: # %entry
-; RV32-NEXT: bnez a0, .LBB24_13
-; RV32-NEXT: # %bb.12: # %entry
+; RV32-NEXT: .LBB24_12: # %entry
+; RV32-NEXT: bnez a0, .LBB24_14
+; RV32-NEXT: # %bb.13: # %entry
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB24_13: # %entry
+; RV32-NEXT: .LBB24_14: # %entry
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -2558,15 +2550,14 @@ define i16 @stest_f64i16_mm(double %x) {
; RV32IFD-NEXT: lui a1, 8
; RV32IFD-NEXT: addi a1, a1, -1
; RV32IFD-NEXT: bge a0, a1, .LBB36_3
-; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: .LBB36_1: # %entry
; RV32IFD-NEXT: lui a1, 1048568
; RV32IFD-NEXT: bge a1, a0, .LBB36_4
-; RV32IFD-NEXT: .LBB36_2: # %entry
+; RV32IFD-NEXT: # %bb.2: # %entry
; RV32IFD-NEXT: ret
; RV32IFD-NEXT: .LBB36_3: # %entry
; RV32IFD-NEXT: mv a0, a1
-; RV32IFD-NEXT: lui a1, 1048568
-; RV32IFD-NEXT: blt a1, a0, .LBB36_2
+; RV32IFD-NEXT: j .LBB36_1
; RV32IFD-NEXT: .LBB36_4: # %entry
; RV32IFD-NEXT: lui a0, 1048568
; RV32IFD-NEXT: ret
@@ -2577,15 +2568,14 @@ define i16 @stest_f64i16_mm(double %x) {
; RV64IFD-NEXT: lui a1, 8
; RV64IFD-NEXT: addiw a1, a1, -1
; RV64IFD-NEXT: bge a0, a1, .LBB36_3
-; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: .LBB36_1: # %entry
; RV64IFD-NEXT: lui a1, 1048568
; RV64IFD-NEXT: bge a1, a0, .LBB36_4
-; RV64IFD-NEXT: .LBB36_2: # %entry
+; RV64IFD-NEXT: # %bb.2: # %entry
; RV64IFD-NEXT: ret
; RV64IFD-NEXT: .LBB36_3: # %entry
; RV64IFD-NEXT: mv a0, a1
-; RV64IFD-NEXT: lui a1, 1048568
-; RV64IFD-NEXT: blt a1, a0, .LBB36_2
+; RV64IFD-NEXT: j .LBB36_1
; RV64IFD-NEXT: .LBB36_4: # %entry
; RV64IFD-NEXT: lui a0, 1048568
; RV64IFD-NEXT: ret
@@ -2751,15 +2741,14 @@ define i16 @stest_f32i16_mm(float %x) {
; RV32-NEXT: lui a1, 8
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: bge a0, a1, .LBB39_3
-; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: .LBB39_1: # %entry
; RV32-NEXT: lui a1, 1048568
; RV32-NEXT: bge a1, a0, .LBB39_4
-; RV32-NEXT: .LBB39_2: # %entry
+; RV32-NEXT: # %bb.2: # %entry
; RV32-NEXT: ret
; RV32-NEXT: .LBB39_3: # %entry
; RV32-NEXT: mv a0, a1
-; RV32-NEXT: lui a1, 1048568
-; RV32-NEXT: blt a1, a0, .LBB39_2
+; RV32-NEXT: j .LBB39_1
; RV32-NEXT: .LBB39_4: # %entry
; RV32-NEXT: lui a0, 1048568
; RV32-NEXT: ret
@@ -2770,15 +2759,14 @@ define i16 @stest_f32i16_mm(float %x) {
; RV64-NEXT: lui a1, 8
; RV64-NEXT: addiw a1, a1, -1
; RV64-NEXT: bge a0, a1, .LBB39_3
-; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: .LBB39_1: # %entry
; RV64-NEXT: lui a1, 1048568
; RV64-NEXT: bge a1, a0, .LBB39_4
-; RV64-NEXT: .LBB39_2: # %entry
+; RV64-NEXT: # %bb.2: # %entry
; RV64-NEXT: ret
; RV64-NEXT: .LBB39_3: # %entry
; RV64-NEXT: mv a0, a1
-; RV64-NEXT: lui a1, 1048568
-; RV64-NEXT: blt a1, a0, .LBB39_2
+; RV64-NEXT: j .LBB39_1
; RV64-NEXT: .LBB39_4: # %entry
; RV64-NEXT: lui a0, 1048568
; RV64-NEXT: ret
@@ -3036,43 +3024,42 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: beq a1, a5, .LBB45_2
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: sltu a6, a1, a5
-; RV32IF-NEXT: or a7, a2, a4
-; RV32IF-NEXT: bnez a7, .LBB45_3
-; RV32IF-NEXT: j .LBB45_4
+; RV32IF-NEXT: j .LBB45_3
; RV32IF-NEXT: .LBB45_2:
; RV32IF-NEXT: sltiu a6, a3, -1
+; RV32IF-NEXT: .LBB45_3:
; RV32IF-NEXT: or a7, a2, a4
-; RV32IF-NEXT: beqz a7, .LBB45_4
-; RV32IF-NEXT: .LBB45_3: # %entry
+; RV32IF-NEXT: beqz a7, .LBB45_5
+; RV32IF-NEXT: # %bb.4: # %entry
; RV32IF-NEXT: slti a6, a4, 0
-; RV32IF-NEXT: .LBB45_4: # %entry
+; RV32IF-NEXT: .LBB45_5: # %entry
; RV32IF-NEXT: addi a7, a6, -1
; RV32IF-NEXT: neg t0, a6
-; RV32IF-NEXT: bnez a6, .LBB45_6
-; RV32IF-NEXT: # %bb.5: # %entry
+; RV32IF-NEXT: bnez a6, .LBB45_7
+; RV32IF-NEXT: # %bb.6: # %entry
; RV32IF-NEXT: mv a1, a5
-; RV32IF-NEXT: .LBB45_6: # %entry
+; RV32IF-NEXT: .LBB45_7: # %entry
; RV32IF-NEXT: or a3, a7, a3
; RV32IF-NEXT: and a4, t0, a4
; RV32IF-NEXT: and a2, t0, a2
-; RV32IF-NEXT: beq a1, a0, .LBB45_8
-; RV32IF-NEXT: # %bb.7: # %entry
+; RV32IF-NEXT: beq a1, a0, .LBB45_9
+; RV32IF-NEXT: # %bb.8: # %entry
; RV32IF-NEXT: sltu a0, a0, a1
-; RV32IF-NEXT: j .LBB45_9
-; RV32IF-NEXT: .LBB45_8:
+; RV32IF-NEXT: j .LBB45_10
+; RV32IF-NEXT: .LBB45_9:
; RV32IF-NEXT: snez a0, a3
-; RV32IF-NEXT: .LBB45_9: # %entry
+; RV32IF-NEXT: .LBB45_10: # %entry
; RV32IF-NEXT: and a2, a2, a4
; RV32IF-NEXT: li a5, -1
-; RV32IF-NEXT: beq a2, a5, .LBB45_11
-; RV32IF-NEXT: # %bb.10: # %entry
+; RV32IF-NEXT: beq a2, a5, .LBB45_12
+; RV32IF-NEXT: # %bb.11: # %entry
; RV32IF-NEXT: slti a0, a4, 0
; RV32IF-NEXT: xori a0, a0, 1
-; RV32IF-NEXT: .LBB45_11: # %entry
-; RV32IF-NEXT: bnez a0, .LBB45_13
-; RV32IF-NEXT: # %bb.12: # %entry
+; RV32IF-NEXT: .LBB45_12: # %entry
+; RV32IF-NEXT: bnez a0, .LBB45_14
+; RV32IF-NEXT: # %bb.13: # %entry
; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: .LBB45_13: # %entry
+; RV32IF-NEXT: .LBB45_14: # %entry
; RV32IF-NEXT: neg a0, a0
; RV32IF-NEXT: and a0, a0, a3
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3139,43 +3126,42 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: beq a1, a5, .LBB45_2
; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: sltu a6, a1, a5
-; RV32IFD-NEXT: or a7, a2, a4
-; RV32IFD-NEXT: bnez a7, .LBB45_3
-; RV32IFD-NEXT: j .LBB45_4
+; RV32IFD-NEXT: j .LBB45_3
; RV32IFD-NEXT: .LBB45_2:
; RV32IFD-NEXT: sltiu a6, a3, -1
+; RV32IFD-NEXT: .LBB45_3:
; RV32IFD-NEXT: or a7, a2, a4
-; RV32IFD-NEXT: beqz a7, .LBB45_4
-; RV32IFD-NEXT: .LBB45_3: # %entry
+; RV32IFD-NEXT: beqz a7, .LBB45_5
+; RV32IFD-NEXT: # %bb.4: # %entry
; RV32IFD-NEXT: slti a6, a4, 0
-; RV32IFD-NEXT: .LBB45_4: # %entry
+; RV32IFD-NEXT: .LBB45_5: # %entry
; RV32IFD-NEXT: addi a7, a6, -1
; RV32IFD-NEXT: neg t0, a6
-; RV32IFD-NEXT: bnez a6, .LBB45_6
-; RV32IFD-NEXT: # %bb.5: # %entry
+; RV32IFD-NEXT: bnez a6, .LBB45_7
+; RV32IFD-NEXT: # %bb.6: # %entry
; RV32IFD-NEXT: mv a1, a5
-; RV32IFD-NEXT: .LBB45_6: # %entry
+; RV32IFD-NEXT: .LBB45_7: # %entry
; RV32IFD-NEXT: or a3, a7, a3
; RV32IFD-NEXT: and a4, t0, a4
; RV32IFD-NEXT: and a2, t0, a2
-; RV32IFD-NEXT: beq a1, a0, .LBB45_8
-; RV32IFD-NEXT: # %bb.7: # %entry
+; RV32IFD-NEXT: beq a1, a0, .LBB45_9
+; RV32IFD-NEXT: # %bb.8: # %entry
; RV32IFD-NEXT: sltu a0, a0, a1
-; RV32IFD-NEXT: j .LBB45_9
-; RV32IFD-NEXT: .LBB45_8:
+; RV32IFD-NEXT: j .LBB45_10
+; RV32IFD-NEXT: .LBB45_9:
; RV32IFD-NEXT: snez a0, a3
-; RV32IFD-NEXT: .LBB45_9: # %entry
+; RV32IFD-NEXT: .LBB45_10: # %entry
; RV32IFD-NEXT: and a2, a2, a4
; RV32IFD-NEXT: li a5, -1
-; RV32IFD-NEXT: beq a2, a5, .LBB45_11
-; RV32IFD-NEXT: # %bb.10: # %entry
+; RV32IFD-NEXT: beq a2, a5, .LBB45_12
+; RV32IFD-NEXT: # %bb.11: # %entry
; RV32IFD-NEXT: slti a0, a4, 0
; RV32IFD-NEXT: xori a0, a0, 1
-; RV32IFD-NEXT: .LBB45_11: # %entry
-; RV32IFD-NEXT: bnez a0, .LBB45_13
-; RV32IFD-NEXT: # %bb.12: # %entry
+; RV32IFD-NEXT: .LBB45_12: # %entry
+; RV32IFD-NEXT: bnez a0, .LBB45_14
+; RV32IFD-NEXT: # %bb.13: # %entry
; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: .LBB45_13: # %entry
+; RV32IFD-NEXT: .LBB45_14: # %entry
; RV32IFD-NEXT: neg a0, a0
; RV32IFD-NEXT: and a0, a0, a3
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3409,43 +3395,42 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: beq a1, a5, .LBB48_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: bnez a7, .LBB48_3
-; RV32-NEXT: j .LBB48_4
+; RV32-NEXT: j .LBB48_3
; RV32-NEXT: .LBB48_2:
; RV32-NEXT: sltiu a6, a3, -1
+; RV32-NEXT: .LBB48_3:
; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: beqz a7, .LBB48_4
-; RV32-NEXT: .LBB48_3: # %entry
+; RV32-NEXT: beqz a7, .LBB48_5
+; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: slti a6, a4, 0
-; RV32-NEXT: .LBB48_4: # %entry
+; RV32-NEXT: .LBB48_5: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
-; RV32-NEXT: bnez a6, .LBB48_6
-; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: bnez a6, .LBB48_7
+; RV32-NEXT: # %bb.6: # %entry
; RV32-NEXT: mv a1, a5
-; RV32-NEXT: .LBB48_6: # %entry
+; RV32-NEXT: .LBB48_7: # %entry
; RV32-NEXT: or a3, a7, a3
; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: beq a1, a0, .LBB48_8
-; RV32-NEXT: # %bb.7: # %entry
+; RV32-NEXT: beq a1, a0, .LBB48_9
+; RV32-NEXT: # %bb.8: # %entry
; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: j .LBB48_9
-; RV32-NEXT: .LBB48_8:
+; RV32-NEXT: j .LBB48_10
+; RV32-NEXT: .LBB48_9:
; RV32-NEXT: snez a0, a3
-; RV32-NEXT: .LBB48_9: # %entry
+; RV32-NEXT: .LBB48_10: # %entry
; RV32-NEXT: and a2, a2, a4
; RV32-NEXT: li a5, -1
-; RV32-NEXT: beq a2, a5, .LBB48_11
-; RV32-NEXT: # %bb.10: # %entry
+; RV32-NEXT: beq a2, a5, .LBB48_12
+; RV32-NEXT: # %bb.11: # %entry
; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: .LBB48_11: # %entry
-; RV32-NEXT: bnez a0, .LBB48_13
-; RV32-NEXT: # %bb.12: # %entry
+; RV32-NEXT: .LBB48_12: # %entry
+; RV32-NEXT: bnez a0, .LBB48_14
+; RV32-NEXT: # %bb.13: # %entry
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB48_13: # %entry
+; RV32-NEXT: .LBB48_14: # %entry
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3610,43 +3595,42 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: beq a1, a5, .LBB51_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: bnez a7, .LBB51_3
-; RV32-NEXT: j .LBB51_4
+; RV32-NEXT: j .LBB51_3
; RV32-NEXT: .LBB51_2:
; RV32-NEXT: sltiu a6, a3, -1
+; RV32-NEXT: .LBB51_3:
; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: beqz a7, .LBB51_4
-; RV32-NEXT: .LBB51_3: # %entry
+; RV32-NEXT: beqz a7, .LBB51_5
+; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: slti a6, a4, 0
-; RV32-NEXT: .LBB51_4: # %entry
+; RV32-NEXT: .LBB51_5: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
-; RV32-NEXT: bnez a6, .LBB51_6
-; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: bnez a6, .LBB51_7
+; RV32-NEXT: # %bb.6: # %entry
; RV32-NEXT: mv a1, a5
-; RV32-NEXT: .LBB51_6: # %entry
+; RV32-NEXT: .LBB51_7: # %entry
; RV32-NEXT: or a3, a7, a3
; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: beq a1, a0, .LBB51_8
-; RV32-NEXT: # %bb.7: # %entry
+; RV32-NEXT: beq a1, a0, .LBB51_9
+; RV32-NEXT: # %bb.8: # %entry
; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: j .LBB51_9
-; RV32-NEXT: .LBB51_8:
+; RV32-NEXT: j .LBB51_10
+; RV32-NEXT: .LBB51_9:
; RV32-NEXT: snez a0, a3
-; RV32-NEXT: .LBB51_9: # %entry
+; RV32-NEXT: .LBB51_10: # %entry
; RV32-NEXT: and a2, a2, a4
; RV32-NEXT: li a5, -1
-; RV32-NEXT: beq a2, a5, .LBB51_11
-; RV32-NEXT: # %bb.10: # %entry
+; RV32-NEXT: beq a2, a5, .LBB51_12
+; RV32-NEXT: # %bb.11: # %entry
; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: .LBB51_11: # %entry
-; RV32-NEXT: bnez a0, .LBB51_13
-; RV32-NEXT: # %bb.12: # %entry
+; RV32-NEXT: .LBB51_12: # %entry
+; RV32-NEXT: bnez a0, .LBB51_14
+; RV32-NEXT: # %bb.13: # %entry
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB51_13: # %entry
+; RV32-NEXT: .LBB51_14: # %entry
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/frame-info.ll b/llvm/test/CodeGen/RISCV/frame-info.ll
index 4979c9f75ef94..c36007651f199 100644
--- a/llvm/test/CodeGen/RISCV/frame-info.ll
+++ b/llvm/test/CodeGen/RISCV/frame-info.ll
@@ -330,10 +330,8 @@ define void @branch_and_tail_call(i1 %a) {
; RV32-LABEL: branch_and_tail_call:
; RV32: # %bb.0:
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: beqz a0, .LBB2_2
-; RV32-NEXT: # %bb.1: # %blue_pill
-; RV32-NEXT: tail callee1
-; RV32-NEXT: .LBB2_2: # %red_pill
+; RV32-NEXT: bnez a0, .LBB2_2
+; RV32-NEXT: # %bb.1: # %red_pill
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
@@ -344,14 +342,14 @@ define void @branch_and_tail_call(i1 %a) {
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
+; RV32-NEXT: .LBB2_2: # %blue_pill
+; RV32-NEXT: tail callee1
;
; RV64-LABEL: branch_and_tail_call:
; RV64: # %bb.0:
; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: beqz a0, .LBB2_2
-; RV64-NEXT: # %bb.1: # %blue_pill
-; RV64-NEXT: tail callee1
-; RV64-NEXT: .LBB2_2: # %red_pill
+; RV64-NEXT: bnez a0, .LBB2_2
+; RV64-NEXT: # %bb.1: # %red_pill
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
@@ -362,14 +360,14 @@ define void @branch_and_tail_call(i1 %a) {
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
+; RV64-NEXT: .LBB2_2: # %blue_pill
+; RV64-NEXT: tail callee1
;
; RV32-WITHFP-LABEL: branch_and_tail_call:
; RV32-WITHFP: # %bb.0:
; RV32-WITHFP-NEXT: andi a0, a0, 1
-; RV32-WITHFP-NEXT: beqz a0, .LBB2_2
-; RV32-WITHFP-NEXT: # %bb.1: # %blue_pill
-; RV32-WITHFP-NEXT: tail callee1
-; RV32-WITHFP-NEXT: .LBB2_2: # %red_pill
+; RV32-WITHFP-NEXT: bnez a0, .LBB2_2
+; RV32-WITHFP-NEXT: # %bb.1: # %red_pill
; RV32-WITHFP-NEXT: addi sp, sp, -16
; RV32-WITHFP-NEXT: .cfi_def_cfa_offset 16
; RV32-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
@@ -387,14 +385,14 @@ define void @branch_and_tail_call(i1 %a) {
; RV32-WITHFP-NEXT: addi sp, sp, 16
; RV32-WITHFP-NEXT: .cfi_def_cfa_offset 0
; RV32-WITHFP-NEXT: ret
+; RV32-WITHFP-NEXT: .LBB2_2: # %blue_pill
+; RV32-WITHFP-NEXT: tail callee1
;
; RV64-WITHFP-LABEL: branch_and_tail_call:
; RV64-WITHFP: # %bb.0:
; RV64-WITHFP-NEXT: andi a0, a0, 1
-; RV64-WITHFP-NEXT: beqz a0, .LBB2_2
-; RV64-WITHFP-NEXT: # %bb.1: # %blue_pill
-; RV64-WITHFP-NEXT: tail callee1
-; RV64-WITHFP-NEXT: .LBB2_2: # %red_pill
+; RV64-WITHFP-NEXT: bnez a0, .LBB2_2
+; RV64-WITHFP-NEXT: # %bb.1: # %red_pill
; RV64-WITHFP-NEXT: addi sp, sp, -16
; RV64-WITHFP-NEXT: .cfi_def_cfa_offset 16
; RV64-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
@@ -412,6 +410,8 @@ define void @branch_and_tail_call(i1 %a) {
; RV64-WITHFP-NEXT: addi sp, sp, 16
; RV64-WITHFP-NEXT: .cfi_def_cfa_offset 0
; RV64-WITHFP-NEXT: ret
+; RV64-WITHFP-NEXT: .LBB2_2: # %blue_pill
+; RV64-WITHFP-NEXT: tail callee1
;
; RV32-DISABLESW-LABEL: branch_and_tail_call:
; RV32-DISABLESW: # %bb.0:
@@ -421,21 +421,21 @@ define void @branch_and_tail_call(i1 %a) {
; RV32-DISABLESW-NEXT: .cfi_offset ra, -4
; RV32-DISABLESW-NEXT: .cfi_remember_state
; RV32-DISABLESW-NEXT: andi a0, a0, 1
-; RV32-DISABLESW-NEXT: beqz a0, .LBB2_2
-; RV32-DISABLESW-NEXT: # %bb.1: # %blue_pill
+; RV32-DISABLESW-NEXT: bnez a0, .LBB2_2
+; RV32-DISABLESW-NEXT: # %bb.1: # %red_pill
+; RV32-DISABLESW-NEXT: call callee2
; RV32-DISABLESW-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-DISABLESW-NEXT: .cfi_restore ra
; RV32-DISABLESW-NEXT: addi sp, sp, 16
; RV32-DISABLESW-NEXT: .cfi_def_cfa_offset 0
-; RV32-DISABLESW-NEXT: tail callee1
-; RV32-DISABLESW-NEXT: .LBB2_2: # %red_pill
+; RV32-DISABLESW-NEXT: ret
+; RV32-DISABLESW-NEXT: .LBB2_2: # %blue_pill
; RV32-DISABLESW-NEXT: .cfi_restore_state
-; RV32-DISABLESW-NEXT: call callee2
; RV32-DISABLESW-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-DISABLESW-NEXT: .cfi_restore ra
; RV32-DISABLESW-NEXT: addi sp, sp, 16
; RV32-DISABLESW-NEXT: .cfi_def_cfa_offset 0
-; RV32-DISABLESW-NEXT: ret
+; RV32-DISABLESW-NEXT: tail callee1
;
; RV64-DISABLESW-LABEL: branch_and_tail_call:
; RV64-DISABLESW: # %bb.0:
@@ -445,21 +445,21 @@ define void @branch_and_tail_call(i1 %a) {
; RV64-DISABLESW-NEXT: .cfi_offset ra, -8
; RV64-DISABLESW-NEXT: .cfi_remember_state
; RV64-DISABLESW-NEXT: andi a0, a0, 1
-; RV64-DISABLESW-NEXT: beqz a0, .LBB2_2
-; RV64-DISABLESW-NEXT: # %bb.1: # %blue_pill
+; RV64-DISABLESW-NEXT: bnez a0, .LBB2_2
+; RV64-DISABLESW-NEXT: # %bb.1: # %red_pill
+; RV64-DISABLESW-NEXT: call callee2
; RV64-DISABLESW-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-DISABLESW-NEXT: .cfi_restore ra
; RV64-DISABLESW-NEXT: addi sp, sp, 16
; RV64-DISABLESW-NEXT: .cfi_def_cfa_offset 0
-; RV64-DISABLESW-NEXT: tail callee1
-; RV64-DISABLESW-NEXT: .LBB2_2: # %red_pill
+; RV64-DISABLESW-NEXT: ret
+; RV64-DISABLESW-NEXT: .LBB2_2: # %blue_pill
; RV64-DISABLESW-NEXT: .cfi_restore_state
-; RV64-DISABLESW-NEXT: call callee2
; RV64-DISABLESW-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-DISABLESW-NEXT: .cfi_restore ra
; RV64-DISABLESW-NEXT: addi sp, sp, 16
; RV64-DISABLESW-NEXT: .cfi_def_cfa_offset 0
-; RV64-DISABLESW-NEXT: ret
+; RV64-DISABLESW-NEXT: tail callee1
;
; RV32-WITHFP-DISABLESW-LABEL: branch_and_tail_call:
; RV32-WITHFP-DISABLESW: # %bb.0:
@@ -473,8 +473,9 @@ define void @branch_and_tail_call(i1 %a) {
; RV32-WITHFP-DISABLESW-NEXT: .cfi_def_cfa s0, 0
; RV32-WITHFP-DISABLESW-NEXT: .cfi_remember_state
; RV32-WITHFP-DISABLESW-NEXT: andi a0, a0, 1
-; RV32-WITHFP-DISABLESW-NEXT: beqz a0, .LBB2_2
-; RV32-WITHFP-DISABLESW-NEXT: # %bb.1: # %blue_pill
+; RV32-WITHFP-DISABLESW-NEXT: bnez a0, .LBB2_2
+; RV32-WITHFP-DISABLESW-NEXT: # %bb.1: # %red_pill
+; RV32-WITHFP-DISABLESW-NEXT: call callee2
; RV32-WITHFP-DISABLESW-NEXT: .cfi_def_cfa sp, 16
; RV32-WITHFP-DISABLESW-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-WITHFP-DISABLESW-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -482,10 +483,9 @@ define void @branch_and_tail_call(i1 %a) {
; RV32-WITHFP-DISABLESW-NEXT: .cfi_restore s0
; RV32-WITHFP-DISABLESW-NEXT: addi sp, sp, 16
; RV32-WITHFP-DISABLESW-NEXT: .cfi_def_cfa_offset 0
-; RV32-WITHFP-DISABLESW-NEXT: tail callee1
-; RV32-WITHFP-DISABLESW-NEXT: .LBB2_2: # %red_pill
+; RV32-WITHFP-DISABLESW-NEXT: ret
+; RV32-WITHFP-DISABLESW-NEXT: .LBB2_2: # %blue_pill
; RV32-WITHFP-DISABLESW-NEXT: .cfi_restore_state
-; RV32-WITHFP-DISABLESW-NEXT: call callee2
; RV32-WITHFP-DISABLESW-NEXT: .cfi_def_cfa sp, 16
; RV32-WITHFP-DISABLESW-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-WITHFP-DISABLESW-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -493,7 +493,7 @@ define void @branch_and_tail_call(i1 %a) {
; RV32-WITHFP-DISABLESW-NEXT: .cfi_restore s0
; RV32-WITHFP-DISABLESW-NEXT: addi sp, sp, 16
; RV32-WITHFP-DISABLESW-NEXT: .cfi_def_cfa_offset 0
-; RV32-WITHFP-DISABLESW-NEXT: ret
+; RV32-WITHFP-DISABLESW-NEXT: tail callee1
;
; RV64-WITHFP-DISABLESW-LABEL: branch_and_tail_call:
; RV64-WITHFP-DISABLESW: # %bb.0:
@@ -507,8 +507,9 @@ define void @branch_and_tail_call(i1 %a) {
; RV64-WITHFP-DISABLESW-NEXT: .cfi_def_cfa s0, 0
; RV64-WITHFP-DISABLESW-NEXT: .cfi_remember_state
; RV64-WITHFP-DISABLESW-NEXT: andi a0, a0, 1
-; RV64-WITHFP-DISABLESW-NEXT: beqz a0, .LBB2_2
-; RV64-WITHFP-DISABLESW-NEXT: # %bb.1: # %blue_pill
+; RV64-WITHFP-DISABLESW-NEXT: bnez a0, .LBB2_2
+; RV64-WITHFP-DISABLESW-NEXT: # %bb.1: # %red_pill
+; RV64-WITHFP-DISABLESW-NEXT: call callee2
; RV64-WITHFP-DISABLESW-NEXT: .cfi_def_cfa sp, 16
; RV64-WITHFP-DISABLESW-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-WITHFP-DISABLESW-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -516,10 +517,9 @@ define void @branch_and_tail_call(i1 %a) {
; RV64-WITHFP-DISABLESW-NEXT: .cfi_restore s0
; RV64-WITHFP-DISABLESW-NEXT: addi sp, sp, 16
; RV64-WITHFP-DISABLESW-NEXT: .cfi_def_cfa_offset 0
-; RV64-WITHFP-DISABLESW-NEXT: tail callee1
-; RV64-WITHFP-DISABLESW-NEXT: .LBB2_2: # %red_pill
+; RV64-WITHFP-DISABLESW-NEXT: ret
+; RV64-WITHFP-DISABLESW-NEXT: .LBB2_2: # %blue_pill
; RV64-WITHFP-DISABLESW-NEXT: .cfi_restore_state
-; RV64-WITHFP-DISABLESW-NEXT: call callee2
; RV64-WITHFP-DISABLESW-NEXT: .cfi_def_cfa sp, 16
; RV64-WITHFP-DISABLESW-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-WITHFP-DISABLESW-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -527,7 +527,7 @@ define void @branch_and_tail_call(i1 %a) {
; RV64-WITHFP-DISABLESW-NEXT: .cfi_restore s0
; RV64-WITHFP-DISABLESW-NEXT: addi sp, sp, 16
; RV64-WITHFP-DISABLESW-NEXT: .cfi_def_cfa_offset 0
-; RV64-WITHFP-DISABLESW-NEXT: ret
+; RV64-WITHFP-DISABLESW-NEXT: tail callee1
br i1 %a, label %blue_pill, label %red_pill
blue_pill:
tail call void @callee1()
diff --git a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll
index ab8f7cd4e6bfd..ceb49747c543c 100644
--- a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll
@@ -23,80 +23,48 @@ declare half @dummy(half)
define void @br_fcmp_false(half %a, half %b) nounwind {
; RV32IZFH-LABEL: br_fcmp_false:
; RV32IZFH: # %bb.0:
-; RV32IZFH-NEXT: j .LBB0_2
-; RV32IZFH-NEXT: # %bb.1: # %if.then
-; RV32IZFH-NEXT: ret
-; RV32IZFH-NEXT: .LBB0_2: # %if.else
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: call abort
;
; RV64IZFH-LABEL: br_fcmp_false:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: j .LBB0_2
-; RV64IZFH-NEXT: # %bb.1: # %if.then
-; RV64IZFH-NEXT: ret
-; RV64IZFH-NEXT: .LBB0_2: # %if.else
; RV64IZFH-NEXT: addi sp, sp, -16
; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFH-NEXT: call abort
;
; RV32IZHINX-LABEL: br_fcmp_false:
; RV32IZHINX: # %bb.0:
-; RV32IZHINX-NEXT: j .LBB0_2
-; RV32IZHINX-NEXT: # %bb.1: # %if.then
-; RV32IZHINX-NEXT: ret
-; RV32IZHINX-NEXT: .LBB0_2: # %if.else
; RV32IZHINX-NEXT: addi sp, sp, -16
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: call abort
;
; RV64IZHINX-LABEL: br_fcmp_false:
; RV64IZHINX: # %bb.0:
-; RV64IZHINX-NEXT: j .LBB0_2
-; RV64IZHINX-NEXT: # %bb.1: # %if.then
-; RV64IZHINX-NEXT: ret
-; RV64IZHINX-NEXT: .LBB0_2: # %if.else
; RV64IZHINX-NEXT: addi sp, sp, -16
; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZHINX-NEXT: call abort
;
; RV32IZFHMIN-LABEL: br_fcmp_false:
; RV32IZFHMIN: # %bb.0:
-; RV32IZFHMIN-NEXT: j .LBB0_2
-; RV32IZFHMIN-NEXT: # %bb.1: # %if.then
-; RV32IZFHMIN-NEXT: ret
-; RV32IZFHMIN-NEXT: .LBB0_2: # %if.else
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: call abort
;
; RV64IZFHMIN-LABEL: br_fcmp_false:
; RV64IZFHMIN: # %bb.0:
-; RV64IZFHMIN-NEXT: j .LBB0_2
-; RV64IZFHMIN-NEXT: # %bb.1: # %if.then
-; RV64IZFHMIN-NEXT: ret
-; RV64IZFHMIN-NEXT: .LBB0_2: # %if.else
; RV64IZFHMIN-NEXT: addi sp, sp, -16
; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFHMIN-NEXT: call abort
;
; RV32IZHINXMIN-LABEL: br_fcmp_false:
; RV32IZHINXMIN: # %bb.0:
-; RV32IZHINXMIN-NEXT: j .LBB0_2
-; RV32IZHINXMIN-NEXT: # %bb.1: # %if.then
-; RV32IZHINXMIN-NEXT: ret
-; RV32IZHINXMIN-NEXT: .LBB0_2: # %if.else
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: call abort
;
; RV64IZHINXMIN-LABEL: br_fcmp_false:
; RV64IZHINXMIN: # %bb.0:
-; RV64IZHINXMIN-NEXT: j .LBB0_2
-; RV64IZHINXMIN-NEXT: # %bb.1: # %if.then
-; RV64IZHINXMIN-NEXT: ret
-; RV64IZHINXMIN-NEXT: .LBB0_2: # %if.else
; RV64IZHINXMIN-NEXT: addi sp, sp, -16
; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZHINXMIN-NEXT: call abort
@@ -1754,80 +1722,48 @@ if.then:
define void @br_fcmp_true(half %a, half %b) nounwind {
; RV32IZFH-LABEL: br_fcmp_true:
; RV32IZFH: # %bb.0:
-; RV32IZFH-NEXT: j .LBB16_2
-; RV32IZFH-NEXT: # %bb.1: # %if.else
-; RV32IZFH-NEXT: ret
-; RV32IZFH-NEXT: .LBB16_2: # %if.then
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: call abort
;
; RV64IZFH-LABEL: br_fcmp_true:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: j .LBB16_2
-; RV64IZFH-NEXT: # %bb.1: # %if.else
-; RV64IZFH-NEXT: ret
-; RV64IZFH-NEXT: .LBB16_2: # %if.then
; RV64IZFH-NEXT: addi sp, sp, -16
; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFH-NEXT: call abort
;
; RV32IZHINX-LABEL: br_fcmp_true:
; RV32IZHINX: # %bb.0:
-; RV32IZHINX-NEXT: j .LBB16_2
-; RV32IZHINX-NEXT: # %bb.1: # %if.else
-; RV32IZHINX-NEXT: ret
-; RV32IZHINX-NEXT: .LBB16_2: # %if.then
; RV32IZHINX-NEXT: addi sp, sp, -16
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: call abort
;
; RV64IZHINX-LABEL: br_fcmp_true:
; RV64IZHINX: # %bb.0:
-; RV64IZHINX-NEXT: j .LBB16_2
-; RV64IZHINX-NEXT: # %bb.1: # %if.else
-; RV64IZHINX-NEXT: ret
-; RV64IZHINX-NEXT: .LBB16_2: # %if.then
; RV64IZHINX-NEXT: addi sp, sp, -16
; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZHINX-NEXT: call abort
;
; RV32IZFHMIN-LABEL: br_fcmp_true:
; RV32IZFHMIN: # %bb.0:
-; RV32IZFHMIN-NEXT: j .LBB16_2
-; RV32IZFHMIN-NEXT: # %bb.1: # %if.else
-; RV32IZFHMIN-NEXT: ret
-; RV32IZFHMIN-NEXT: .LBB16_2: # %if.then
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: call abort
;
; RV64IZFHMIN-LABEL: br_fcmp_true:
; RV64IZFHMIN: # %bb.0:
-; RV64IZFHMIN-NEXT: j .LBB16_2
-; RV64IZFHMIN-NEXT: # %bb.1: # %if.else
-; RV64IZFHMIN-NEXT: ret
-; RV64IZFHMIN-NEXT: .LBB16_2: # %if.then
; RV64IZFHMIN-NEXT: addi sp, sp, -16
; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFHMIN-NEXT: call abort
;
; RV32IZHINXMIN-LABEL: br_fcmp_true:
; RV32IZHINXMIN: # %bb.0:
-; RV32IZHINXMIN-NEXT: j .LBB16_2
-; RV32IZHINXMIN-NEXT: # %bb.1: # %if.else
-; RV32IZHINXMIN-NEXT: ret
-; RV32IZHINXMIN-NEXT: .LBB16_2: # %if.then
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: call abort
;
; RV64IZHINXMIN-LABEL: br_fcmp_true:
; RV64IZHINXMIN: # %bb.0:
-; RV64IZHINXMIN-NEXT: j .LBB16_2
-; RV64IZHINXMIN-NEXT: # %bb.1: # %if.else
-; RV64IZHINXMIN-NEXT: ret
-; RV64IZHINXMIN-NEXT: .LBB16_2: # %if.then
; RV64IZHINXMIN-NEXT: addi sp, sp, -16
; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZHINXMIN-NEXT: call abort
diff --git a/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll
index bc3f44363fb95..104fa22027042 100644
--- a/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll
+++ b/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll
@@ -20,16 +20,15 @@ define half @fminimum_f16(half %a, half %b) nounwind {
; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
; CHECKIZFH-NEXT: fmv.h fa5, fa1
; CHECKIZFH-NEXT: beqz a0, .LBB0_3
-; CHECKIZFH-NEXT: # %bb.1:
+; CHECKIZFH-NEXT: .LBB0_1:
; CHECKIZFH-NEXT: feq.h a0, fa1, fa1
; CHECKIZFH-NEXT: beqz a0, .LBB0_4
-; CHECKIZFH-NEXT: .LBB0_2:
+; CHECKIZFH-NEXT: # %bb.2:
; CHECKIZFH-NEXT: fmin.h fa0, fa0, fa5
; CHECKIZFH-NEXT: ret
; CHECKIZFH-NEXT: .LBB0_3:
; CHECKIZFH-NEXT: fmv.h fa5, fa0
-; CHECKIZFH-NEXT: feq.h a0, fa1, fa1
-; CHECKIZFH-NEXT: bnez a0, .LBB0_2
+; CHECKIZFH-NEXT: j .LBB0_1
; CHECKIZFH-NEXT: .LBB0_4:
; CHECKIZFH-NEXT: fmin.h fa0, fa1, fa5
; CHECKIZFH-NEXT: ret
@@ -39,16 +38,15 @@ define half @fminimum_f16(half %a, half %b) nounwind {
; CHECKIZHINX-NEXT: feq.h a3, a0, a0
; CHECKIZHINX-NEXT: mv a2, a1
; CHECKIZHINX-NEXT: beqz a3, .LBB0_3
-; CHECKIZHINX-NEXT: # %bb.1:
+; CHECKIZHINX-NEXT: .LBB0_1:
; CHECKIZHINX-NEXT: feq.h a3, a1, a1
; CHECKIZHINX-NEXT: beqz a3, .LBB0_4
-; CHECKIZHINX-NEXT: .LBB0_2:
+; CHECKIZHINX-NEXT: # %bb.2:
; CHECKIZHINX-NEXT: fmin.h a0, a0, a2
; CHECKIZHINX-NEXT: ret
; CHECKIZHINX-NEXT: .LBB0_3:
; CHECKIZHINX-NEXT: mv a2, a0
-; CHECKIZHINX-NEXT: feq.h a3, a1, a1
-; CHECKIZHINX-NEXT: bnez a3, .LBB0_2
+; CHECKIZHINX-NEXT: j .LBB0_1
; CHECKIZHINX-NEXT: .LBB0_4:
; CHECKIZHINX-NEXT: fmin.h a0, a1, a2
; CHECKIZHINX-NEXT: ret
@@ -64,16 +62,15 @@ define half @fmaximum_f16(half %a, half %b) nounwind {
; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
; CHECKIZFH-NEXT: fmv.h fa5, fa1
; CHECKIZFH-NEXT: beqz a0, .LBB1_3
-; CHECKIZFH-NEXT: # %bb.1:
+; CHECKIZFH-NEXT: .LBB1_1:
; CHECKIZFH-NEXT: feq.h a0, fa1, fa1
; CHECKIZFH-NEXT: beqz a0, .LBB1_4
-; CHECKIZFH-NEXT: .LBB1_2:
+; CHECKIZFH-NEXT: # %bb.2:
; CHECKIZFH-NEXT: fmax.h fa0, fa0, fa5
; CHECKIZFH-NEXT: ret
; CHECKIZFH-NEXT: .LBB1_3:
; CHECKIZFH-NEXT: fmv.h fa5, fa0
-; CHECKIZFH-NEXT: feq.h a0, fa1, fa1
-; CHECKIZFH-NEXT: bnez a0, .LBB1_2
+; CHECKIZFH-NEXT: j .LBB1_1
; CHECKIZFH-NEXT: .LBB1_4:
; CHECKIZFH-NEXT: fmax.h fa0, fa1, fa5
; CHECKIZFH-NEXT: ret
@@ -83,16 +80,15 @@ define half @fmaximum_f16(half %a, half %b) nounwind {
; CHECKIZHINX-NEXT: feq.h a3, a0, a0
; CHECKIZHINX-NEXT: mv a2, a1
; CHECKIZHINX-NEXT: beqz a3, .LBB1_3
-; CHECKIZHINX-NEXT: # %bb.1:
+; CHECKIZHINX-NEXT: .LBB1_1:
; CHECKIZHINX-NEXT: feq.h a3, a1, a1
; CHECKIZHINX-NEXT: beqz a3, .LBB1_4
-; CHECKIZHINX-NEXT: .LBB1_2:
+; CHECKIZHINX-NEXT: # %bb.2:
; CHECKIZHINX-NEXT: fmax.h a0, a0, a2
; CHECKIZHINX-NEXT: ret
; CHECKIZHINX-NEXT: .LBB1_3:
; CHECKIZHINX-NEXT: mv a2, a0
-; CHECKIZHINX-NEXT: feq.h a3, a1, a1
-; CHECKIZHINX-NEXT: bnez a3, .LBB1_2
+; CHECKIZHINX-NEXT: j .LBB1_1
; CHECKIZHINX-NEXT: .LBB1_4:
; CHECKIZHINX-NEXT: fmax.h a0, a1, a2
; CHECKIZHINX-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/machine-pipeliner.ll b/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
index d250098576687..cc8a7270b289d 100644
--- a/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
@@ -52,7 +52,7 @@ define void @test_pipelined_1(ptr noalias %in, ptr noalias %out, i32 signext %cn
;
; CHECK-PIPELINED-LABEL: test_pipelined_1:
; CHECK-PIPELINED: # %bb.0: # %entry
-; CHECK-PIPELINED-NEXT: blez a2, .LBB1_6
+; CHECK-PIPELINED-NEXT: blez a2, .LBB1_7
; CHECK-PIPELINED-NEXT: # %bb.1: # %for.body.preheader
; CHECK-PIPELINED-NEXT: lw a4, 0(a1)
; CHECK-PIPELINED-NEXT: addi a2, a2, -1
@@ -60,32 +60,32 @@ define void @test_pipelined_1(ptr noalias %in, ptr noalias %out, i32 signext %cn
; CHECK-PIPELINED-NEXT: addi a2, a0, 4
; CHECK-PIPELINED-NEXT: addi a1, a1, 4
; CHECK-PIPELINED-NEXT: addi a6, a6, 4
-; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_5
+; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_6
; CHECK-PIPELINED-NEXT: # %bb.2: # %for.body
; CHECK-PIPELINED-NEXT: lw a5, 0(a1)
; CHECK-PIPELINED-NEXT: addi a3, a2, 4
-; CHECK-PIPELINED-NEXT: addi a4, a4, 1
-; CHECK-PIPELINED-NEXT: addi a1, a1, 4
-; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_4
+; CHECK-PIPELINED-NEXT: j .LBB1_4
; CHECK-PIPELINED-NEXT: .LBB1_3: # %for.body
-; CHECK-PIPELINED-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-PIPELINED-NEXT: # in Loop: Header=BB1_4 Depth=1
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
; CHECK-PIPELINED-NEXT: mv a4, a5
; CHECK-PIPELINED-NEXT: lw a5, 0(a1)
; CHECK-PIPELINED-NEXT: mv a0, a2
; CHECK-PIPELINED-NEXT: mv a2, a3
; CHECK-PIPELINED-NEXT: addi a3, a3, 4
+; CHECK-PIPELINED-NEXT: .LBB1_4: # %for.body
+; CHECK-PIPELINED-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-PIPELINED-NEXT: addi a4, a4, 1
; CHECK-PIPELINED-NEXT: addi a1, a1, 4
; CHECK-PIPELINED-NEXT: bne a1, a6, .LBB1_3
-; CHECK-PIPELINED-NEXT: .LBB1_4:
+; CHECK-PIPELINED-NEXT: # %bb.5:
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
; CHECK-PIPELINED-NEXT: mv a0, a2
; CHECK-PIPELINED-NEXT: mv a4, a5
-; CHECK-PIPELINED-NEXT: .LBB1_5:
+; CHECK-PIPELINED-NEXT: .LBB1_6:
; CHECK-PIPELINED-NEXT: addi a4, a4, 1
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
-; CHECK-PIPELINED-NEXT: .LBB1_6: # %for.end
+; CHECK-PIPELINED-NEXT: .LBB1_7: # %for.end
; CHECK-PIPELINED-NEXT: ret
entry:
%cmp = icmp sgt i32 %cnt, 0
diff --git a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
index 6d3000a513538..82bdbda1a63f5 100644
--- a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
+++ b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
@@ -13,71 +13,11 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: call toupper
; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: beqz s0, .LBB0_25
-; CHECK-NEXT: .LBB0_1: # %while.body
+; CHECK-NEXT: beqz s0, .LBB0_2
+; CHECK-NEXT: .LBB0_1: # %while.body.6
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: j .LBB0_3
-; CHECK-NEXT: # %bb.2: # %while.body
-; CHECK-NEXT: j .LBB0_15
-; CHECK-NEXT: .LBB0_3: # %while.body.1
-; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: j .LBB0_5
-; CHECK-NEXT: # %bb.4: # %while.body.1
-; CHECK-NEXT: j .LBB0_16
-; CHECK-NEXT: .LBB0_5: # %while.body.3
-; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: j .LBB0_7
-; CHECK-NEXT: # %bb.6: # %while.body.3
-; CHECK-NEXT: j .LBB0_18
-; CHECK-NEXT: .LBB0_7: # %while.body.4
-; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: j .LBB0_9
-; CHECK-NEXT: # %bb.8: # %while.body.4
-; CHECK-NEXT: j .LBB0_20
-; CHECK-NEXT: .LBB0_9: # %while.body.5
-; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: j .LBB0_11
-; CHECK-NEXT: # %bb.10: # %while.body.5
-; CHECK-NEXT: j .LBB0_22
-; CHECK-NEXT: .LBB0_11: # %while.body.6
-; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: j .LBB0_1
-; CHECK-NEXT: # %bb.12: # %while.body.6
-; CHECK-NEXT: # %bb.13: # %while.body.6
-; CHECK-NEXT: # %bb.14: # %strdup.exit.split.loop.exit126
-; CHECK-NEXT: addi s0, s1, 7
-; CHECK-NEXT: j .LBB0_24
-; CHECK-NEXT: .LBB0_15: # %while.body
-; CHECK-NEXT: j .LBB0_17
-; CHECK-NEXT: .LBB0_16: # %while.body.1
-; CHECK-NEXT: .LBB0_17: # %strdup.exit.loopexit
-; CHECK-NEXT: li s0, 0
-; CHECK-NEXT: j .LBB0_24
-; CHECK-NEXT: .LBB0_18: # %while.body.3
-; CHECK-NEXT: # %bb.19: # %strdup.exit.split.loop.exit120
-; CHECK-NEXT: addi s0, s1, 4
-; CHECK-NEXT: j .LBB0_24
-; CHECK-NEXT: .LBB0_20: # %while.body.4
-; CHECK-NEXT: # %bb.21: # %strdup.exit.split.loop.exit122
-; CHECK-NEXT: addi s0, s1, 5
-; CHECK-NEXT: j .LBB0_24
-; CHECK-NEXT: .LBB0_22: # %while.body.5
-; CHECK-NEXT: j .LBB0_24
-; CHECK-NEXT: # %bb.23:
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: j .LBB0_25
-; CHECK-NEXT: .LBB0_24: # %strdup.exit
-; CHECK-NEXT: li s1, 0
-; CHECK-NEXT: mv s2, a0
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: mv a1, s0
-; CHECK-NEXT: jalr s1
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: mv a1, s2
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: jalr s1
-; CHECK-NEXT: li a1, 1
-; CHECK-NEXT: .LBB0_25: # %return
+; CHECK-NEXT: .LBB0_2: # %return
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/reduce-unnecessary-extension.ll b/llvm/test/CodeGen/RISCV/reduce-unnecessary-extension.ll
index 351408a7f085c..94f35a8cf660b 100644
--- a/llvm/test/CodeGen/RISCV/reduce-unnecessary-extension.ll
+++ b/llvm/test/CodeGen/RISCV/reduce-unnecessary-extension.ll
@@ -65,25 +65,25 @@ define signext i32 @test_loop() nounwind {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: li s1, -16
; RV64I-NEXT: lui s2, %hi(PL_reg_match_utf8)
-; RV64I-NEXT: j .LBB1_2
-; RV64I-NEXT: .LBB1_1: # in Loop: Header=BB1_2 Depth=1
+; RV64I-NEXT: j .LBB1_3
+; RV64I-NEXT: .LBB1_1: # in Loop: Header=BB1_3 Depth=1
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call test2
+; RV64I-NEXT: .LBB1_2: # in Loop: Header=BB1_3 Depth=1
; RV64I-NEXT: addiw s1, s1, 1
-; RV64I-NEXT: beqz s1, .LBB1_4
-; RV64I-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: beqz s1, .LBB1_5
+; RV64I-NEXT: .LBB1_3: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: lb s0, %lo(PL_reg_match_utf8)(s2)
; RV64I-NEXT: beqz s0, .LBB1_1
-; RV64I-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=1
+; RV64I-NEXT: # %bb.4: # in Loop: Header=BB1_3 Depth=1
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call test1
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call test2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call test3
-; RV64I-NEXT: addiw s1, s1, 1
-; RV64I-NEXT: bnez s1, .LBB1_2
-; RV64I-NEXT: .LBB1_4:
+; RV64I-NEXT: j .LBB1_2
+; RV64I-NEXT: .LBB1_5:
; RV64I-NEXT: li a0, 0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
index 0508016736004..851b37f2887b4 100644
--- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -36,25 +36,22 @@ define i32 @test(i32 %n) {
; CHECK-O3-LABEL: test:
; CHECK-O3: # %bb.0: # %entry
; CHECK-O3-NEXT: sext.w a1, a0
-; CHECK-O3-NEXT: blez a1, .LBB0_2
+; CHECK-O3-NEXT: blez a1, .LBB0_3
; CHECK-O3-NEXT: # %bb.1: # %if.then
; CHECK-O3-NEXT: lui a1, %hi(a)
; CHECK-O3-NEXT: lw a1, %lo(a)(a1)
; CHECK-O3-NEXT: mul a0, a1, a0
+; CHECK-O3-NEXT: .LBB0_2: # %if.then
; CHECK-O3-NEXT: lui a1, %hi(c)
; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
; CHECK-O3-NEXT: addi a0, a0, -1
; CHECK-O3-NEXT: mulw a0, a0, a1
; CHECK-O3-NEXT: tail foo
-; CHECK-O3-NEXT: .LBB0_2: # %if.else
+; CHECK-O3-NEXT: .LBB0_3: # %if.else
; CHECK-O3-NEXT: lui a1, %hi(b)
; CHECK-O3-NEXT: lw a1, %lo(b)(a1)
; CHECK-O3-NEXT: divw a0, a1, a0
-; CHECK-O3-NEXT: lui a1, %hi(c)
-; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
-; CHECK-O3-NEXT: addi a0, a0, -1
-; CHECK-O3-NEXT: mulw a0, a0, a1
-; CHECK-O3-NEXT: tail foo
+; CHECK-O3-NEXT: j .LBB0_2
entry:
%cmp = icmp sgt i32 %n, 0
br i1 %cmp, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 90a8eadb3f974..09332f44626e7 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -876,26 +876,26 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
; RV32I-NEXT: lw a0, 0(a0)
; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a2, 12(a1)
-; RV32I-NEXT: beqz a3, .LBB22_3
+; RV32I-NEXT: beqz a3, .LBB22_4
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: sub a0, a3, a0
; RV32I-NEXT: xor a3, a3, a0
; RV32I-NEXT: sltu a0, a0, a3
-; RV32I-NEXT: lw a1, 8(a1)
-; RV32I-NEXT: bnez a2, .LBB22_4
; RV32I-NEXT: .LBB22_2:
+; RV32I-NEXT: lw a1, 8(a1)
+; RV32I-NEXT: bnez a2, .LBB22_5
+; RV32I-NEXT: # %bb.3:
; RV32I-NEXT: addi a2, a1, -1
; RV32I-NEXT: xor a1, a1, a2
; RV32I-NEXT: sltu a1, a2, a1
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB22_3:
+; RV32I-NEXT: .LBB22_4:
; RV32I-NEXT: addi a3, a0, -1
; RV32I-NEXT: xor a0, a0, a3
; RV32I-NEXT: sltu a0, a3, a0
-; RV32I-NEXT: lw a1, 8(a1)
-; RV32I-NEXT: beqz a2, .LBB22_2
-; RV32I-NEXT: .LBB22_4:
+; RV32I-NEXT: j .LBB22_2
+; RV32I-NEXT: .LBB22_5:
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: sub a1, a2, a1
; RV32I-NEXT: xor a2, a2, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
index be73d4808937a..4e36fbfd23cfc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
@@ -8,16 +8,15 @@
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: sf.vc.v.x 3, 31, v9, a1
; CHECK-NEXT: bgeu a0, zero, .LBB0_3
- ; CHECK-NEXT: # %bb.1: # %entry
+ ; CHECK-NEXT: .LBB0_1: # %entry
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: bltu a0, a2, .LBB0_4
- ; CHECK-NEXT: .LBB0_2: # %entry
+ ; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: vse64.v v9, (a1)
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: vmv.v.i v9, 0
- ; CHECK-NEXT: li a2, 128
- ; CHECK-NEXT: bgeu a0, a2, .LBB0_2
+ ; CHECK-NEXT: j .LBB0_1
; CHECK-NEXT: .LBB0_4: # %entry
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: vsll.vi v8, v8, 5
diff --git a/llvm/test/CodeGen/RISCV/rvv/expandload.ll b/llvm/test/CodeGen/RISCV/rvv/expandload.ll
index 9173fa4622487..54ecf7f7e4650 100644
--- a/llvm/test/CodeGen/RISCV/rvv/expandload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/expandload.ll
@@ -1641,7 +1641,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_28: # %else106
; CHECK-RV32-NEXT: slli a1, a3, 3
; CHECK-RV32-NEXT: bgez a1, .LBB61_30
-; CHECK-RV32-NEXT: .LBB61_29: # %cond.load109
+; CHECK-RV32-NEXT: # %bb.29: # %cond.load109
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 29, e8, m1, tu, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -1801,7 +1801,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_64: # %else238
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_66
-; CHECK-RV32-NEXT: .LBB61_65: # %cond.load241
+; CHECK-RV32-NEXT: # %bb.65: # %cond.load241
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -1953,7 +1953,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_98: # %else366
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_100
-; CHECK-RV32-NEXT: .LBB61_99: # %cond.load369
+; CHECK-RV32-NEXT: # %bb.99: # %cond.load369
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2105,7 +2105,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_132: # %else494
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_134
-; CHECK-RV32-NEXT: .LBB61_133: # %cond.load497
+; CHECK-RV32-NEXT: # %bb.133: # %cond.load497
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2257,7 +2257,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_166: # %else622
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_168
-; CHECK-RV32-NEXT: .LBB61_167: # %cond.load625
+; CHECK-RV32-NEXT: # %bb.167: # %cond.load625
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2409,7 +2409,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_200: # %else750
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_202
-; CHECK-RV32-NEXT: .LBB61_201: # %cond.load753
+; CHECK-RV32-NEXT: # %bb.201: # %cond.load753
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2561,7 +2561,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_234: # %else878
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_236
-; CHECK-RV32-NEXT: .LBB61_235: # %cond.load881
+; CHECK-RV32-NEXT: # %bb.235: # %cond.load881
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2713,7 +2713,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_268: # %else1006
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_270
-; CHECK-RV32-NEXT: .LBB61_269: # %cond.load1009
+; CHECK-RV32-NEXT: # %bb.269: # %cond.load1009
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2865,7 +2865,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_302: # %else1134
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_304
-; CHECK-RV32-NEXT: .LBB61_303: # %cond.load1137
+; CHECK-RV32-NEXT: # %bb.303: # %cond.load1137
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3012,7 +3012,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_336: # %else1262
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_338
-; CHECK-RV32-NEXT: .LBB61_337: # %cond.load1265
+; CHECK-RV32-NEXT: # %bb.337: # %cond.load1265
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3159,7 +3159,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_370: # %else1390
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_372
-; CHECK-RV32-NEXT: .LBB61_371: # %cond.load1393
+; CHECK-RV32-NEXT: # %bb.371: # %cond.load1393
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3306,7 +3306,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_404: # %else1518
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_406
-; CHECK-RV32-NEXT: .LBB61_405: # %cond.load1521
+; CHECK-RV32-NEXT: # %bb.405: # %cond.load1521
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3453,7 +3453,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_438: # %else1646
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_440
-; CHECK-RV32-NEXT: .LBB61_439: # %cond.load1649
+; CHECK-RV32-NEXT: # %bb.439: # %cond.load1649
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3600,7 +3600,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_472: # %else1774
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_474
-; CHECK-RV32-NEXT: .LBB61_473: # %cond.load1777
+; CHECK-RV32-NEXT: # %bb.473: # %cond.load1777
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3747,7 +3747,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_506: # %else1902
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_508
-; CHECK-RV32-NEXT: .LBB61_507: # %cond.load1905
+; CHECK-RV32-NEXT: # %bb.507: # %cond.load1905
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3912,9 +3912,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a1, a3, 2
-; CHECK-RV32-NEXT: bnez a1, .LBB61_545
-; CHECK-RV32-NEXT: j .LBB61_2
+; CHECK-RV32-NEXT: j .LBB61_1
; CHECK-RV32-NEXT: .LBB61_545: # %cond.load1
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, m1, tu, ma
@@ -3924,9 +3922,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a1, a3, 4
-; CHECK-RV32-NEXT: bnez a1, .LBB61_546
-; CHECK-RV32-NEXT: j .LBB61_3
+; CHECK-RV32-NEXT: j .LBB61_2
; CHECK-RV32-NEXT: .LBB61_546: # %cond.load5
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 3, e8, m1, tu, ma
@@ -3936,9 +3932,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a1, a3, 8
-; CHECK-RV32-NEXT: bnez a1, .LBB61_547
-; CHECK-RV32-NEXT: j .LBB61_4
+; CHECK-RV32-NEXT: j .LBB61_3
; CHECK-RV32-NEXT: .LBB61_547: # %cond.load9
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 4, e8, m1, tu, ma
@@ -3948,9 +3942,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a1, a3, 16
-; CHECK-RV32-NEXT: bnez a1, .LBB61_548
-; CHECK-RV32-NEXT: j .LBB61_5
+; CHECK-RV32-NEXT: j .LBB61_4
; CHECK-RV32-NEXT: .LBB61_548: # %cond.load13
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 5, e8, m1, tu, ma
@@ -3960,9 +3952,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a1, a3, 32
-; CHECK-RV32-NEXT: bnez a1, .LBB61_549
-; CHECK-RV32-NEXT: j .LBB61_6
+; CHECK-RV32-NEXT: j .LBB61_5
; CHECK-RV32-NEXT: .LBB61_549: # %cond.load17
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 6, e8, m1, tu, ma
@@ -3972,9 +3962,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a1, a3, 64
-; CHECK-RV32-NEXT: bnez a1, .LBB61_550
-; CHECK-RV32-NEXT: j .LBB61_7
+; CHECK-RV32-NEXT: j .LBB61_6
; CHECK-RV32-NEXT: .LBB61_550: # %cond.load21
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 7, e8, m1, tu, ma
@@ -3984,9 +3972,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a1, a3, 128
-; CHECK-RV32-NEXT: bnez a1, .LBB61_551
-; CHECK-RV32-NEXT: j .LBB61_8
+; CHECK-RV32-NEXT: j .LBB61_7
; CHECK-RV32-NEXT: .LBB61_551: # %cond.load25
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 8, e8, m1, tu, ma
@@ -3996,9 +3982,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a1, a3, 256
-; CHECK-RV32-NEXT: bnez a1, .LBB61_552
-; CHECK-RV32-NEXT: j .LBB61_9
+; CHECK-RV32-NEXT: j .LBB61_8
; CHECK-RV32-NEXT: .LBB61_552: # %cond.load29
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 9, e8, m1, tu, ma
@@ -4008,9 +3992,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a1, a3, 512
-; CHECK-RV32-NEXT: bnez a1, .LBB61_553
-; CHECK-RV32-NEXT: j .LBB61_10
+; CHECK-RV32-NEXT: j .LBB61_9
; CHECK-RV32-NEXT: .LBB61_553: # %cond.load33
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 10, e8, m1, tu, ma
@@ -4020,9 +4002,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a1, a3, 1024
-; CHECK-RV32-NEXT: bnez a1, .LBB61_554
-; CHECK-RV32-NEXT: j .LBB61_11
+; CHECK-RV32-NEXT: j .LBB61_10
; CHECK-RV32-NEXT: .LBB61_554: # %cond.load37
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 11, e8, m1, tu, ma
@@ -4032,9 +4012,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 20
-; CHECK-RV32-NEXT: bltz a1, .LBB61_555
-; CHECK-RV32-NEXT: j .LBB61_12
+; CHECK-RV32-NEXT: j .LBB61_11
; CHECK-RV32-NEXT: .LBB61_555: # %cond.load41
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 12, e8, m1, tu, ma
@@ -4044,9 +4022,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 19
-; CHECK-RV32-NEXT: bltz a1, .LBB61_556
-; CHECK-RV32-NEXT: j .LBB61_13
+; CHECK-RV32-NEXT: j .LBB61_12
; CHECK-RV32-NEXT: .LBB61_556: # %cond.load45
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 13, e8, m1, tu, ma
@@ -4056,9 +4032,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 18
-; CHECK-RV32-NEXT: bltz a1, .LBB61_557
-; CHECK-RV32-NEXT: j .LBB61_14
+; CHECK-RV32-NEXT: j .LBB61_13
; CHECK-RV32-NEXT: .LBB61_557: # %cond.load49
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 14, e8, m1, tu, ma
@@ -4068,9 +4042,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 17
-; CHECK-RV32-NEXT: bltz a1, .LBB61_558
-; CHECK-RV32-NEXT: j .LBB61_15
+; CHECK-RV32-NEXT: j .LBB61_14
; CHECK-RV32-NEXT: .LBB61_558: # %cond.load53
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 15, e8, m1, tu, ma
@@ -4080,9 +4052,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 16
-; CHECK-RV32-NEXT: bltz a1, .LBB61_559
-; CHECK-RV32-NEXT: j .LBB61_16
+; CHECK-RV32-NEXT: j .LBB61_15
; CHECK-RV32-NEXT: .LBB61_559: # %cond.load57
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 16, e8, m1, tu, ma
@@ -4092,9 +4062,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 15
-; CHECK-RV32-NEXT: bltz a1, .LBB61_560
-; CHECK-RV32-NEXT: j .LBB61_17
+; CHECK-RV32-NEXT: j .LBB61_16
; CHECK-RV32-NEXT: .LBB61_560: # %cond.load61
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 17, e8, m1, tu, ma
@@ -4104,9 +4072,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 14
-; CHECK-RV32-NEXT: bltz a1, .LBB61_561
-; CHECK-RV32-NEXT: j .LBB61_18
+; CHECK-RV32-NEXT: j .LBB61_17
; CHECK-RV32-NEXT: .LBB61_561: # %cond.load65
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 18, e8, m1, tu, ma
@@ -4116,9 +4082,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 13
-; CHECK-RV32-NEXT: bltz a1, .LBB61_562
-; CHECK-RV32-NEXT: j .LBB61_19
+; CHECK-RV32-NEXT: j .LBB61_18
; CHECK-RV32-NEXT: .LBB61_562: # %cond.load69
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 19, e8, m1, tu, ma
@@ -4128,9 +4092,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 12
-; CHECK-RV32-NEXT: bltz a1, .LBB61_563
-; CHECK-RV32-NEXT: j .LBB61_20
+; CHECK-RV32-NEXT: j .LBB61_19
; CHECK-RV32-NEXT: .LBB61_563: # %cond.load73
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 20, e8, m1, tu, ma
@@ -4140,9 +4102,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 11
-; CHECK-RV32-NEXT: bltz a1, .LBB61_564
-; CHECK-RV32-NEXT: j .LBB61_21
+; CHECK-RV32-NEXT: j .LBB61_20
; CHECK-RV32-NEXT: .LBB61_564: # %cond.load77
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 21, e8, m1, tu, ma
@@ -4152,9 +4112,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 10
-; CHECK-RV32-NEXT: bltz a1, .LBB61_565
-; CHECK-RV32-NEXT: j .LBB61_22
+; CHECK-RV32-NEXT: j .LBB61_21
; CHECK-RV32-NEXT: .LBB61_565: # %cond.load81
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 22, e8, m1, tu, ma
@@ -4164,9 +4122,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 9
-; CHECK-RV32-NEXT: bltz a1, .LBB61_566
-; CHECK-RV32-NEXT: j .LBB61_23
+; CHECK-RV32-NEXT: j .LBB61_22
; CHECK-RV32-NEXT: .LBB61_566: # %cond.load85
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 23, e8, m1, tu, ma
@@ -4176,9 +4132,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 8
-; CHECK-RV32-NEXT: bltz a1, .LBB61_567
-; CHECK-RV32-NEXT: j .LBB61_24
+; CHECK-RV32-NEXT: j .LBB61_23
; CHECK-RV32-NEXT: .LBB61_567: # %cond.load89
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 24, e8, m1, tu, ma
@@ -4188,9 +4142,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 7
-; CHECK-RV32-NEXT: bltz a1, .LBB61_568
-; CHECK-RV32-NEXT: j .LBB61_25
+; CHECK-RV32-NEXT: j .LBB61_24
; CHECK-RV32-NEXT: .LBB61_568: # %cond.load93
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 25, e8, m1, tu, ma
@@ -4200,9 +4152,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 6
-; CHECK-RV32-NEXT: bltz a1, .LBB61_569
-; CHECK-RV32-NEXT: j .LBB61_26
+; CHECK-RV32-NEXT: j .LBB61_25
; CHECK-RV32-NEXT: .LBB61_569: # %cond.load97
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 26, e8, m1, tu, ma
@@ -4212,9 +4162,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 5
-; CHECK-RV32-NEXT: bltz a1, .LBB61_570
-; CHECK-RV32-NEXT: j .LBB61_27
+; CHECK-RV32-NEXT: j .LBB61_26
; CHECK-RV32-NEXT: .LBB61_570: # %cond.load101
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 27, e8, m1, tu, ma
@@ -4224,9 +4172,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 4
-; CHECK-RV32-NEXT: bltz a1, .LBB61_571
-; CHECK-RV32-NEXT: j .LBB61_28
+; CHECK-RV32-NEXT: j .LBB61_27
; CHECK-RV32-NEXT: .LBB61_571: # %cond.load105
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 28, e8, m1, tu, ma
@@ -4236,11 +4182,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a1, a3, 3
-; CHECK-RV32-NEXT: bgez a1, .LBB61_1025
-; CHECK-RV32-NEXT: j .LBB61_29
-; CHECK-RV32-NEXT: .LBB61_1025: # %cond.load105
-; CHECK-RV32-NEXT: j .LBB61_30
+; CHECK-RV32-NEXT: j .LBB61_28
; CHECK-RV32-NEXT: .LBB61_572: # %cond.load121
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -4251,9 +4193,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 1
-; CHECK-RV32-NEXT: bnez a3, .LBB61_573
-; CHECK-RV32-NEXT: j .LBB61_36
+; CHECK-RV32-NEXT: j .LBB61_35
; CHECK-RV32-NEXT: .LBB61_573: # %cond.load125
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4266,9 +4206,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 2
-; CHECK-RV32-NEXT: bnez a3, .LBB61_574
-; CHECK-RV32-NEXT: j .LBB61_37
+; CHECK-RV32-NEXT: j .LBB61_36
; CHECK-RV32-NEXT: .LBB61_574: # %cond.load129
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4281,9 +4219,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 4
-; CHECK-RV32-NEXT: bnez a3, .LBB61_575
-; CHECK-RV32-NEXT: j .LBB61_38
+; CHECK-RV32-NEXT: j .LBB61_37
; CHECK-RV32-NEXT: .LBB61_575: # %cond.load133
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4296,9 +4232,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 8
-; CHECK-RV32-NEXT: bnez a3, .LBB61_576
-; CHECK-RV32-NEXT: j .LBB61_39
+; CHECK-RV32-NEXT: j .LBB61_38
; CHECK-RV32-NEXT: .LBB61_576: # %cond.load137
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4311,9 +4245,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 16
-; CHECK-RV32-NEXT: bnez a3, .LBB61_577
-; CHECK-RV32-NEXT: j .LBB61_40
+; CHECK-RV32-NEXT: j .LBB61_39
; CHECK-RV32-NEXT: .LBB61_577: # %cond.load141
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4326,9 +4258,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 32
-; CHECK-RV32-NEXT: bnez a3, .LBB61_578
-; CHECK-RV32-NEXT: j .LBB61_41
+; CHECK-RV32-NEXT: j .LBB61_40
; CHECK-RV32-NEXT: .LBB61_578: # %cond.load145
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4341,9 +4271,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 64
-; CHECK-RV32-NEXT: bnez a3, .LBB61_579
-; CHECK-RV32-NEXT: j .LBB61_42
+; CHECK-RV32-NEXT: j .LBB61_41
; CHECK-RV32-NEXT: .LBB61_579: # %cond.load149
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4356,9 +4284,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 128
-; CHECK-RV32-NEXT: bnez a3, .LBB61_580
-; CHECK-RV32-NEXT: j .LBB61_43
+; CHECK-RV32-NEXT: j .LBB61_42
; CHECK-RV32-NEXT: .LBB61_580: # %cond.load153
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4371,9 +4297,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 256
-; CHECK-RV32-NEXT: bnez a3, .LBB61_581
-; CHECK-RV32-NEXT: j .LBB61_44
+; CHECK-RV32-NEXT: j .LBB61_43
; CHECK-RV32-NEXT: .LBB61_581: # %cond.load157
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4386,9 +4310,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 512
-; CHECK-RV32-NEXT: bnez a3, .LBB61_582
-; CHECK-RV32-NEXT: j .LBB61_45
+; CHECK-RV32-NEXT: j .LBB61_44
; CHECK-RV32-NEXT: .LBB61_582: # %cond.load161
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4401,9 +4323,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 1024
-; CHECK-RV32-NEXT: bnez a3, .LBB61_583
-; CHECK-RV32-NEXT: j .LBB61_46
+; CHECK-RV32-NEXT: j .LBB61_45
; CHECK-RV32-NEXT: .LBB61_583: # %cond.load165
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4416,9 +4336,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 20
-; CHECK-RV32-NEXT: bltz a3, .LBB61_584
-; CHECK-RV32-NEXT: j .LBB61_47
+; CHECK-RV32-NEXT: j .LBB61_46
; CHECK-RV32-NEXT: .LBB61_584: # %cond.load169
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4431,9 +4349,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 19
-; CHECK-RV32-NEXT: bltz a3, .LBB61_585
-; CHECK-RV32-NEXT: j .LBB61_48
+; CHECK-RV32-NEXT: j .LBB61_47
; CHECK-RV32-NEXT: .LBB61_585: # %cond.load173
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4446,9 +4362,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 18
-; CHECK-RV32-NEXT: bltz a3, .LBB61_586
-; CHECK-RV32-NEXT: j .LBB61_49
+; CHECK-RV32-NEXT: j .LBB61_48
; CHECK-RV32-NEXT: .LBB61_586: # %cond.load177
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4461,9 +4375,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 17
-; CHECK-RV32-NEXT: bltz a3, .LBB61_587
-; CHECK-RV32-NEXT: j .LBB61_50
+; CHECK-RV32-NEXT: j .LBB61_49
; CHECK-RV32-NEXT: .LBB61_587: # %cond.load181
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4476,9 +4388,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 16
-; CHECK-RV32-NEXT: bltz a3, .LBB61_588
-; CHECK-RV32-NEXT: j .LBB61_51
+; CHECK-RV32-NEXT: j .LBB61_50
; CHECK-RV32-NEXT: .LBB61_588: # %cond.load185
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4491,9 +4401,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 15
-; CHECK-RV32-NEXT: bltz a3, .LBB61_589
-; CHECK-RV32-NEXT: j .LBB61_52
+; CHECK-RV32-NEXT: j .LBB61_51
; CHECK-RV32-NEXT: .LBB61_589: # %cond.load189
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4506,9 +4414,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 14
-; CHECK-RV32-NEXT: bltz a3, .LBB61_590
-; CHECK-RV32-NEXT: j .LBB61_53
+; CHECK-RV32-NEXT: j .LBB61_52
; CHECK-RV32-NEXT: .LBB61_590: # %cond.load193
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4521,9 +4427,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 13
-; CHECK-RV32-NEXT: bltz a3, .LBB61_591
-; CHECK-RV32-NEXT: j .LBB61_54
+; CHECK-RV32-NEXT: j .LBB61_53
; CHECK-RV32-NEXT: .LBB61_591: # %cond.load197
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4536,9 +4440,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 12
-; CHECK-RV32-NEXT: bltz a3, .LBB61_592
-; CHECK-RV32-NEXT: j .LBB61_55
+; CHECK-RV32-NEXT: j .LBB61_54
; CHECK-RV32-NEXT: .LBB61_592: # %cond.load201
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4551,9 +4453,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 11
-; CHECK-RV32-NEXT: bltz a3, .LBB61_593
-; CHECK-RV32-NEXT: j .LBB61_56
+; CHECK-RV32-NEXT: j .LBB61_55
; CHECK-RV32-NEXT: .LBB61_593: # %cond.load205
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4566,9 +4466,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 10
-; CHECK-RV32-NEXT: bltz a3, .LBB61_594
-; CHECK-RV32-NEXT: j .LBB61_57
+; CHECK-RV32-NEXT: j .LBB61_56
; CHECK-RV32-NEXT: .LBB61_594: # %cond.load209
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4581,9 +4479,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 9
-; CHECK-RV32-NEXT: bltz a3, .LBB61_595
-; CHECK-RV32-NEXT: j .LBB61_58
+; CHECK-RV32-NEXT: j .LBB61_57
; CHECK-RV32-NEXT: .LBB61_595: # %cond.load213
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4596,9 +4492,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 8
-; CHECK-RV32-NEXT: bltz a3, .LBB61_596
-; CHECK-RV32-NEXT: j .LBB61_59
+; CHECK-RV32-NEXT: j .LBB61_58
; CHECK-RV32-NEXT: .LBB61_596: # %cond.load217
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4611,9 +4505,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 7
-; CHECK-RV32-NEXT: bltz a3, .LBB61_597
-; CHECK-RV32-NEXT: j .LBB61_60
+; CHECK-RV32-NEXT: j .LBB61_59
; CHECK-RV32-NEXT: .LBB61_597: # %cond.load221
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4626,9 +4518,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 6
-; CHECK-RV32-NEXT: bltz a3, .LBB61_598
-; CHECK-RV32-NEXT: j .LBB61_61
+; CHECK-RV32-NEXT: j .LBB61_60
; CHECK-RV32-NEXT: .LBB61_598: # %cond.load225
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4641,9 +4531,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 5
-; CHECK-RV32-NEXT: bltz a3, .LBB61_599
-; CHECK-RV32-NEXT: j .LBB61_62
+; CHECK-RV32-NEXT: j .LBB61_61
; CHECK-RV32-NEXT: .LBB61_599: # %cond.load229
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4656,9 +4544,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 4
-; CHECK-RV32-NEXT: bltz a3, .LBB61_600
-; CHECK-RV32-NEXT: j .LBB61_63
+; CHECK-RV32-NEXT: j .LBB61_62
; CHECK-RV32-NEXT: .LBB61_600: # %cond.load233
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4671,9 +4557,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 3
-; CHECK-RV32-NEXT: bltz a3, .LBB61_601
-; CHECK-RV32-NEXT: j .LBB61_64
+; CHECK-RV32-NEXT: j .LBB61_63
; CHECK-RV32-NEXT: .LBB61_601: # %cond.load237
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4686,11 +4570,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 2
-; CHECK-RV32-NEXT: bgez a3, .LBB61_1026
-; CHECK-RV32-NEXT: j .LBB61_65
-; CHECK-RV32-NEXT: .LBB61_1026: # %cond.load237
-; CHECK-RV32-NEXT: j .LBB61_66
+; CHECK-RV32-NEXT: j .LBB61_64
; CHECK-RV32-NEXT: .LBB61_602: # %cond.load249
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -4702,9 +4582,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 1
-; CHECK-RV32-NEXT: bnez a2, .LBB61_603
-; CHECK-RV32-NEXT: j .LBB61_70
+; CHECK-RV32-NEXT: j .LBB61_69
; CHECK-RV32-NEXT: .LBB61_603: # %cond.load253
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4717,9 +4595,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 2
-; CHECK-RV32-NEXT: bnez a2, .LBB61_604
-; CHECK-RV32-NEXT: j .LBB61_71
+; CHECK-RV32-NEXT: j .LBB61_70
; CHECK-RV32-NEXT: .LBB61_604: # %cond.load257
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4732,9 +4608,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 4
-; CHECK-RV32-NEXT: bnez a2, .LBB61_605
-; CHECK-RV32-NEXT: j .LBB61_72
+; CHECK-RV32-NEXT: j .LBB61_71
; CHECK-RV32-NEXT: .LBB61_605: # %cond.load261
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4747,9 +4621,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 8
-; CHECK-RV32-NEXT: bnez a2, .LBB61_606
-; CHECK-RV32-NEXT: j .LBB61_73
+; CHECK-RV32-NEXT: j .LBB61_72
; CHECK-RV32-NEXT: .LBB61_606: # %cond.load265
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4762,9 +4634,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 16
-; CHECK-RV32-NEXT: bnez a2, .LBB61_607
-; CHECK-RV32-NEXT: j .LBB61_74
+; CHECK-RV32-NEXT: j .LBB61_73
; CHECK-RV32-NEXT: .LBB61_607: # %cond.load269
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4777,9 +4647,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 32
-; CHECK-RV32-NEXT: bnez a2, .LBB61_608
-; CHECK-RV32-NEXT: j .LBB61_75
+; CHECK-RV32-NEXT: j .LBB61_74
; CHECK-RV32-NEXT: .LBB61_608: # %cond.load273
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4792,9 +4660,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 64
-; CHECK-RV32-NEXT: bnez a2, .LBB61_609
-; CHECK-RV32-NEXT: j .LBB61_76
+; CHECK-RV32-NEXT: j .LBB61_75
; CHECK-RV32-NEXT: .LBB61_609: # %cond.load277
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4807,9 +4673,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 128
-; CHECK-RV32-NEXT: bnez a2, .LBB61_610
-; CHECK-RV32-NEXT: j .LBB61_77
+; CHECK-RV32-NEXT: j .LBB61_76
; CHECK-RV32-NEXT: .LBB61_610: # %cond.load281
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4822,9 +4686,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 256
-; CHECK-RV32-NEXT: bnez a2, .LBB61_611
-; CHECK-RV32-NEXT: j .LBB61_78
+; CHECK-RV32-NEXT: j .LBB61_77
; CHECK-RV32-NEXT: .LBB61_611: # %cond.load285
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4837,9 +4699,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 512
-; CHECK-RV32-NEXT: bnez a2, .LBB61_612
-; CHECK-RV32-NEXT: j .LBB61_79
+; CHECK-RV32-NEXT: j .LBB61_78
; CHECK-RV32-NEXT: .LBB61_612: # %cond.load289
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4852,9 +4712,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 1024
-; CHECK-RV32-NEXT: bnez a2, .LBB61_613
-; CHECK-RV32-NEXT: j .LBB61_80
+; CHECK-RV32-NEXT: j .LBB61_79
; CHECK-RV32-NEXT: .LBB61_613: # %cond.load293
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4867,9 +4725,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 20
-; CHECK-RV32-NEXT: bltz a2, .LBB61_614
-; CHECK-RV32-NEXT: j .LBB61_81
+; CHECK-RV32-NEXT: j .LBB61_80
; CHECK-RV32-NEXT: .LBB61_614: # %cond.load297
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4882,9 +4738,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 19
-; CHECK-RV32-NEXT: bltz a2, .LBB61_615
-; CHECK-RV32-NEXT: j .LBB61_82
+; CHECK-RV32-NEXT: j .LBB61_81
; CHECK-RV32-NEXT: .LBB61_615: # %cond.load301
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4897,9 +4751,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 18
-; CHECK-RV32-NEXT: bltz a2, .LBB61_616
-; CHECK-RV32-NEXT: j .LBB61_83
+; CHECK-RV32-NEXT: j .LBB61_82
; CHECK-RV32-NEXT: .LBB61_616: # %cond.load305
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4912,9 +4764,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 17
-; CHECK-RV32-NEXT: bltz a2, .LBB61_617
-; CHECK-RV32-NEXT: j .LBB61_84
+; CHECK-RV32-NEXT: j .LBB61_83
; CHECK-RV32-NEXT: .LBB61_617: # %cond.load309
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4927,9 +4777,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 16
-; CHECK-RV32-NEXT: bltz a2, .LBB61_618
-; CHECK-RV32-NEXT: j .LBB61_85
+; CHECK-RV32-NEXT: j .LBB61_84
; CHECK-RV32-NEXT: .LBB61_618: # %cond.load313
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4942,9 +4790,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 15
-; CHECK-RV32-NEXT: bltz a2, .LBB61_619
-; CHECK-RV32-NEXT: j .LBB61_86
+; CHECK-RV32-NEXT: j .LBB61_85
; CHECK-RV32-NEXT: .LBB61_619: # %cond.load317
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4957,9 +4803,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 14
-; CHECK-RV32-NEXT: bltz a2, .LBB61_620
-; CHECK-RV32-NEXT: j .LBB61_87
+; CHECK-RV32-NEXT: j .LBB61_86
; CHECK-RV32-NEXT: .LBB61_620: # %cond.load321
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4972,9 +4816,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 13
-; CHECK-RV32-NEXT: bltz a2, .LBB61_621
-; CHECK-RV32-NEXT: j .LBB61_88
+; CHECK-RV32-NEXT: j .LBB61_87
; CHECK-RV32-NEXT: .LBB61_621: # %cond.load325
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4987,9 +4829,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 12
-; CHECK-RV32-NEXT: bltz a2, .LBB61_622
-; CHECK-RV32-NEXT: j .LBB61_89
+; CHECK-RV32-NEXT: j .LBB61_88
; CHECK-RV32-NEXT: .LBB61_622: # %cond.load329
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5002,9 +4842,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 11
-; CHECK-RV32-NEXT: bltz a2, .LBB61_623
-; CHECK-RV32-NEXT: j .LBB61_90
+; CHECK-RV32-NEXT: j .LBB61_89
; CHECK-RV32-NEXT: .LBB61_623: # %cond.load333
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5017,9 +4855,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 10
-; CHECK-RV32-NEXT: bltz a2, .LBB61_624
-; CHECK-RV32-NEXT: j .LBB61_91
+; CHECK-RV32-NEXT: j .LBB61_90
; CHECK-RV32-NEXT: .LBB61_624: # %cond.load337
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5032,9 +4868,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 9
-; CHECK-RV32-NEXT: bltz a2, .LBB61_625
-; CHECK-RV32-NEXT: j .LBB61_92
+; CHECK-RV32-NEXT: j .LBB61_91
; CHECK-RV32-NEXT: .LBB61_625: # %cond.load341
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5047,9 +4881,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 8
-; CHECK-RV32-NEXT: bltz a2, .LBB61_626
-; CHECK-RV32-NEXT: j .LBB61_93
+; CHECK-RV32-NEXT: j .LBB61_92
; CHECK-RV32-NEXT: .LBB61_626: # %cond.load345
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5062,9 +4894,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 7
-; CHECK-RV32-NEXT: bltz a2, .LBB61_627
-; CHECK-RV32-NEXT: j .LBB61_94
+; CHECK-RV32-NEXT: j .LBB61_93
; CHECK-RV32-NEXT: .LBB61_627: # %cond.load349
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5077,9 +4907,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 6
-; CHECK-RV32-NEXT: bltz a2, .LBB61_628
-; CHECK-RV32-NEXT: j .LBB61_95
+; CHECK-RV32-NEXT: j .LBB61_94
; CHECK-RV32-NEXT: .LBB61_628: # %cond.load353
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5092,9 +4920,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 5
-; CHECK-RV32-NEXT: bltz a2, .LBB61_629
-; CHECK-RV32-NEXT: j .LBB61_96
+; CHECK-RV32-NEXT: j .LBB61_95
; CHECK-RV32-NEXT: .LBB61_629: # %cond.load357
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5107,9 +4933,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 4
-; CHECK-RV32-NEXT: bltz a2, .LBB61_630
-; CHECK-RV32-NEXT: j .LBB61_97
+; CHECK-RV32-NEXT: j .LBB61_96
; CHECK-RV32-NEXT: .LBB61_630: # %cond.load361
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5122,9 +4946,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 3
-; CHECK-RV32-NEXT: bltz a2, .LBB61_631
-; CHECK-RV32-NEXT: j .LBB61_98
+; CHECK-RV32-NEXT: j .LBB61_97
; CHECK-RV32-NEXT: .LBB61_631: # %cond.load365
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5137,11 +4959,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 2
-; CHECK-RV32-NEXT: bgez a2, .LBB61_1027
-; CHECK-RV32-NEXT: j .LBB61_99
-; CHECK-RV32-NEXT: .LBB61_1027: # %cond.load365
-; CHECK-RV32-NEXT: j .LBB61_100
+; CHECK-RV32-NEXT: j .LBB61_98
; CHECK-RV32-NEXT: .LBB61_632: # %cond.load377
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -5153,9 +4971,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 1
-; CHECK-RV32-NEXT: bnez a3, .LBB61_633
-; CHECK-RV32-NEXT: j .LBB61_104
+; CHECK-RV32-NEXT: j .LBB61_103
; CHECK-RV32-NEXT: .LBB61_633: # %cond.load381
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5168,9 +4984,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 2
-; CHECK-RV32-NEXT: bnez a3, .LBB61_634
-; CHECK-RV32-NEXT: j .LBB61_105
+; CHECK-RV32-NEXT: j .LBB61_104
; CHECK-RV32-NEXT: .LBB61_634: # %cond.load385
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5183,9 +4997,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 4
-; CHECK-RV32-NEXT: bnez a3, .LBB61_635
-; CHECK-RV32-NEXT: j .LBB61_106
+; CHECK-RV32-NEXT: j .LBB61_105
; CHECK-RV32-NEXT: .LBB61_635: # %cond.load389
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5198,9 +5010,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 8
-; CHECK-RV32-NEXT: bnez a3, .LBB61_636
-; CHECK-RV32-NEXT: j .LBB61_107
+; CHECK-RV32-NEXT: j .LBB61_106
; CHECK-RV32-NEXT: .LBB61_636: # %cond.load393
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5213,9 +5023,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 16
-; CHECK-RV32-NEXT: bnez a3, .LBB61_637
-; CHECK-RV32-NEXT: j .LBB61_108
+; CHECK-RV32-NEXT: j .LBB61_107
; CHECK-RV32-NEXT: .LBB61_637: # %cond.load397
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5228,9 +5036,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 32
-; CHECK-RV32-NEXT: bnez a3, .LBB61_638
-; CHECK-RV32-NEXT: j .LBB61_109
+; CHECK-RV32-NEXT: j .LBB61_108
; CHECK-RV32-NEXT: .LBB61_638: # %cond.load401
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5243,9 +5049,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 64
-; CHECK-RV32-NEXT: bnez a3, .LBB61_639
-; CHECK-RV32-NEXT: j .LBB61_110
+; CHECK-RV32-NEXT: j .LBB61_109
; CHECK-RV32-NEXT: .LBB61_639: # %cond.load405
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5258,9 +5062,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 128
-; CHECK-RV32-NEXT: bnez a3, .LBB61_640
-; CHECK-RV32-NEXT: j .LBB61_111
+; CHECK-RV32-NEXT: j .LBB61_110
; CHECK-RV32-NEXT: .LBB61_640: # %cond.load409
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5273,9 +5075,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 256
-; CHECK-RV32-NEXT: bnez a3, .LBB61_641
-; CHECK-RV32-NEXT: j .LBB61_112
+; CHECK-RV32-NEXT: j .LBB61_111
; CHECK-RV32-NEXT: .LBB61_641: # %cond.load413
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5288,9 +5088,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 512
-; CHECK-RV32-NEXT: bnez a3, .LBB61_642
-; CHECK-RV32-NEXT: j .LBB61_113
+; CHECK-RV32-NEXT: j .LBB61_112
; CHECK-RV32-NEXT: .LBB61_642: # %cond.load417
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5303,9 +5101,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 1024
-; CHECK-RV32-NEXT: bnez a3, .LBB61_643
-; CHECK-RV32-NEXT: j .LBB61_114
+; CHECK-RV32-NEXT: j .LBB61_113
; CHECK-RV32-NEXT: .LBB61_643: # %cond.load421
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5318,9 +5114,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 20
-; CHECK-RV32-NEXT: bltz a3, .LBB61_644
-; CHECK-RV32-NEXT: j .LBB61_115
+; CHECK-RV32-NEXT: j .LBB61_114
; CHECK-RV32-NEXT: .LBB61_644: # %cond.load425
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5333,9 +5127,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 19
-; CHECK-RV32-NEXT: bltz a3, .LBB61_645
-; CHECK-RV32-NEXT: j .LBB61_116
+; CHECK-RV32-NEXT: j .LBB61_115
; CHECK-RV32-NEXT: .LBB61_645: # %cond.load429
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5348,9 +5140,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 18
-; CHECK-RV32-NEXT: bltz a3, .LBB61_646
-; CHECK-RV32-NEXT: j .LBB61_117
+; CHECK-RV32-NEXT: j .LBB61_116
; CHECK-RV32-NEXT: .LBB61_646: # %cond.load433
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5363,9 +5153,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 17
-; CHECK-RV32-NEXT: bltz a3, .LBB61_647
-; CHECK-RV32-NEXT: j .LBB61_118
+; CHECK-RV32-NEXT: j .LBB61_117
; CHECK-RV32-NEXT: .LBB61_647: # %cond.load437
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5378,9 +5166,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 16
-; CHECK-RV32-NEXT: bltz a3, .LBB61_648
-; CHECK-RV32-NEXT: j .LBB61_119
+; CHECK-RV32-NEXT: j .LBB61_118
; CHECK-RV32-NEXT: .LBB61_648: # %cond.load441
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5393,9 +5179,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 15
-; CHECK-RV32-NEXT: bltz a3, .LBB61_649
-; CHECK-RV32-NEXT: j .LBB61_120
+; CHECK-RV32-NEXT: j .LBB61_119
; CHECK-RV32-NEXT: .LBB61_649: # %cond.load445
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5408,9 +5192,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 14
-; CHECK-RV32-NEXT: bltz a3, .LBB61_650
-; CHECK-RV32-NEXT: j .LBB61_121
+; CHECK-RV32-NEXT: j .LBB61_120
; CHECK-RV32-NEXT: .LBB61_650: # %cond.load449
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5423,9 +5205,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 13
-; CHECK-RV32-NEXT: bltz a3, .LBB61_651
-; CHECK-RV32-NEXT: j .LBB61_122
+; CHECK-RV32-NEXT: j .LBB61_121
; CHECK-RV32-NEXT: .LBB61_651: # %cond.load453
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5438,9 +5218,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 12
-; CHECK-RV32-NEXT: bltz a3, .LBB61_652
-; CHECK-RV32-NEXT: j .LBB61_123
+; CHECK-RV32-NEXT: j .LBB61_122
; CHECK-RV32-NEXT: .LBB61_652: # %cond.load457
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5453,9 +5231,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 11
-; CHECK-RV32-NEXT: bltz a3, .LBB61_653
-; CHECK-RV32-NEXT: j .LBB61_124
+; CHECK-RV32-NEXT: j .LBB61_123
; CHECK-RV32-NEXT: .LBB61_653: # %cond.load461
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5468,9 +5244,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 10
-; CHECK-RV32-NEXT: bltz a3, .LBB61_654
-; CHECK-RV32-NEXT: j .LBB61_125
+; CHECK-RV32-NEXT: j .LBB61_124
; CHECK-RV32-NEXT: .LBB61_654: # %cond.load465
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5483,9 +5257,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 9
-; CHECK-RV32-NEXT: bltz a3, .LBB61_655
-; CHECK-RV32-NEXT: j .LBB61_126
+; CHECK-RV32-NEXT: j .LBB61_125
; CHECK-RV32-NEXT: .LBB61_655: # %cond.load469
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5498,9 +5270,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 8
-; CHECK-RV32-NEXT: bltz a3, .LBB61_656
-; CHECK-RV32-NEXT: j .LBB61_127
+; CHECK-RV32-NEXT: j .LBB61_126
; CHECK-RV32-NEXT: .LBB61_656: # %cond.load473
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5513,9 +5283,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 7
-; CHECK-RV32-NEXT: bltz a3, .LBB61_657
-; CHECK-RV32-NEXT: j .LBB61_128
+; CHECK-RV32-NEXT: j .LBB61_127
; CHECK-RV32-NEXT: .LBB61_657: # %cond.load477
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5528,9 +5296,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 6
-; CHECK-RV32-NEXT: bltz a3, .LBB61_658
-; CHECK-RV32-NEXT: j .LBB61_129
+; CHECK-RV32-NEXT: j .LBB61_128
; CHECK-RV32-NEXT: .LBB61_658: # %cond.load481
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5543,9 +5309,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 5
-; CHECK-RV32-NEXT: bltz a3, .LBB61_659
-; CHECK-RV32-NEXT: j .LBB61_130
+; CHECK-RV32-NEXT: j .LBB61_129
; CHECK-RV32-NEXT: .LBB61_659: # %cond.load485
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5558,9 +5322,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 4
-; CHECK-RV32-NEXT: bltz a3, .LBB61_660
-; CHECK-RV32-NEXT: j .LBB61_131
+; CHECK-RV32-NEXT: j .LBB61_130
; CHECK-RV32-NEXT: .LBB61_660: # %cond.load489
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5573,9 +5335,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 3
-; CHECK-RV32-NEXT: bltz a3, .LBB61_661
-; CHECK-RV32-NEXT: j .LBB61_132
+; CHECK-RV32-NEXT: j .LBB61_131
; CHECK-RV32-NEXT: .LBB61_661: # %cond.load493
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5588,11 +5348,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 2
-; CHECK-RV32-NEXT: bgez a3, .LBB61_1028
-; CHECK-RV32-NEXT: j .LBB61_133
-; CHECK-RV32-NEXT: .LBB61_1028: # %cond.load493
-; CHECK-RV32-NEXT: j .LBB61_134
+; CHECK-RV32-NEXT: j .LBB61_132
; CHECK-RV32-NEXT: .LBB61_662: # %cond.load505
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -5604,9 +5360,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 1
-; CHECK-RV32-NEXT: bnez a2, .LBB61_663
-; CHECK-RV32-NEXT: j .LBB61_138
+; CHECK-RV32-NEXT: j .LBB61_137
; CHECK-RV32-NEXT: .LBB61_663: # %cond.load509
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5619,9 +5373,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 2
-; CHECK-RV32-NEXT: bnez a2, .LBB61_664
-; CHECK-RV32-NEXT: j .LBB61_139
+; CHECK-RV32-NEXT: j .LBB61_138
; CHECK-RV32-NEXT: .LBB61_664: # %cond.load513
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5634,9 +5386,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 4
-; CHECK-RV32-NEXT: bnez a2, .LBB61_665
-; CHECK-RV32-NEXT: j .LBB61_140
+; CHECK-RV32-NEXT: j .LBB61_139
; CHECK-RV32-NEXT: .LBB61_665: # %cond.load517
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5649,9 +5399,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 8
-; CHECK-RV32-NEXT: bnez a2, .LBB61_666
-; CHECK-RV32-NEXT: j .LBB61_141
+; CHECK-RV32-NEXT: j .LBB61_140
; CHECK-RV32-NEXT: .LBB61_666: # %cond.load521
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5664,9 +5412,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 16
-; CHECK-RV32-NEXT: bnez a2, .LBB61_667
-; CHECK-RV32-NEXT: j .LBB61_142
+; CHECK-RV32-NEXT: j .LBB61_141
; CHECK-RV32-NEXT: .LBB61_667: # %cond.load525
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5679,9 +5425,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 32
-; CHECK-RV32-NEXT: bnez a2, .LBB61_668
-; CHECK-RV32-NEXT: j .LBB61_143
+; CHECK-RV32-NEXT: j .LBB61_142
; CHECK-RV32-NEXT: .LBB61_668: # %cond.load529
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5694,9 +5438,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 64
-; CHECK-RV32-NEXT: bnez a2, .LBB61_669
-; CHECK-RV32-NEXT: j .LBB61_144
+; CHECK-RV32-NEXT: j .LBB61_143
; CHECK-RV32-NEXT: .LBB61_669: # %cond.load533
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5709,9 +5451,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 128
-; CHECK-RV32-NEXT: bnez a2, .LBB61_670
-; CHECK-RV32-NEXT: j .LBB61_145
+; CHECK-RV32-NEXT: j .LBB61_144
; CHECK-RV32-NEXT: .LBB61_670: # %cond.load537
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5724,9 +5464,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 256
-; CHECK-RV32-NEXT: bnez a2, .LBB61_671
-; CHECK-RV32-NEXT: j .LBB61_146
+; CHECK-RV32-NEXT: j .LBB61_145
; CHECK-RV32-NEXT: .LBB61_671: # %cond.load541
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5739,9 +5477,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 512
-; CHECK-RV32-NEXT: bnez a2, .LBB61_672
-; CHECK-RV32-NEXT: j .LBB61_147
+; CHECK-RV32-NEXT: j .LBB61_146
; CHECK-RV32-NEXT: .LBB61_672: # %cond.load545
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5754,9 +5490,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 1024
-; CHECK-RV32-NEXT: bnez a2, .LBB61_673
-; CHECK-RV32-NEXT: j .LBB61_148
+; CHECK-RV32-NEXT: j .LBB61_147
; CHECK-RV32-NEXT: .LBB61_673: # %cond.load549
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5769,9 +5503,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 20
-; CHECK-RV32-NEXT: bltz a2, .LBB61_674
-; CHECK-RV32-NEXT: j .LBB61_149
+; CHECK-RV32-NEXT: j .LBB61_148
; CHECK-RV32-NEXT: .LBB61_674: # %cond.load553
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5784,9 +5516,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 19
-; CHECK-RV32-NEXT: bltz a2, .LBB61_675
-; CHECK-RV32-NEXT: j .LBB61_150
+; CHECK-RV32-NEXT: j .LBB61_149
; CHECK-RV32-NEXT: .LBB61_675: # %cond.load557
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5799,9 +5529,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 18
-; CHECK-RV32-NEXT: bltz a2, .LBB61_676
-; CHECK-RV32-NEXT: j .LBB61_151
+; CHECK-RV32-NEXT: j .LBB61_150
; CHECK-RV32-NEXT: .LBB61_676: # %cond.load561
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5814,9 +5542,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 17
-; CHECK-RV32-NEXT: bltz a2, .LBB61_677
-; CHECK-RV32-NEXT: j .LBB61_152
+; CHECK-RV32-NEXT: j .LBB61_151
; CHECK-RV32-NEXT: .LBB61_677: # %cond.load565
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5829,9 +5555,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 16
-; CHECK-RV32-NEXT: bltz a2, .LBB61_678
-; CHECK-RV32-NEXT: j .LBB61_153
+; CHECK-RV32-NEXT: j .LBB61_152
; CHECK-RV32-NEXT: .LBB61_678: # %cond.load569
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5844,9 +5568,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 15
-; CHECK-RV32-NEXT: bltz a2, .LBB61_679
-; CHECK-RV32-NEXT: j .LBB61_154
+; CHECK-RV32-NEXT: j .LBB61_153
; CHECK-RV32-NEXT: .LBB61_679: # %cond.load573
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5859,9 +5581,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 14
-; CHECK-RV32-NEXT: bltz a2, .LBB61_680
-; CHECK-RV32-NEXT: j .LBB61_155
+; CHECK-RV32-NEXT: j .LBB61_154
; CHECK-RV32-NEXT: .LBB61_680: # %cond.load577
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5874,9 +5594,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 13
-; CHECK-RV32-NEXT: bltz a2, .LBB61_681
-; CHECK-RV32-NEXT: j .LBB61_156
+; CHECK-RV32-NEXT: j .LBB61_155
; CHECK-RV32-NEXT: .LBB61_681: # %cond.load581
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5889,9 +5607,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 12
-; CHECK-RV32-NEXT: bltz a2, .LBB61_682
-; CHECK-RV32-NEXT: j .LBB61_157
+; CHECK-RV32-NEXT: j .LBB61_156
; CHECK-RV32-NEXT: .LBB61_682: # %cond.load585
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5904,9 +5620,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 11
-; CHECK-RV32-NEXT: bltz a2, .LBB61_683
-; CHECK-RV32-NEXT: j .LBB61_158
+; CHECK-RV32-NEXT: j .LBB61_157
; CHECK-RV32-NEXT: .LBB61_683: # %cond.load589
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5919,9 +5633,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 10
-; CHECK-RV32-NEXT: bltz a2, .LBB61_684
-; CHECK-RV32-NEXT: j .LBB61_159
+; CHECK-RV32-NEXT: j .LBB61_158
; CHECK-RV32-NEXT: .LBB61_684: # %cond.load593
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5934,9 +5646,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 9
-; CHECK-RV32-NEXT: bltz a2, .LBB61_685
-; CHECK-RV32-NEXT: j .LBB61_160
+; CHECK-RV32-NEXT: j .LBB61_159
; CHECK-RV32-NEXT: .LBB61_685: # %cond.load597
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5949,9 +5659,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 8
-; CHECK-RV32-NEXT: bltz a2, .LBB61_686
-; CHECK-RV32-NEXT: j .LBB61_161
+; CHECK-RV32-NEXT: j .LBB61_160
; CHECK-RV32-NEXT: .LBB61_686: # %cond.load601
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5964,9 +5672,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 7
-; CHECK-RV32-NEXT: bltz a2, .LBB61_687
-; CHECK-RV32-NEXT: j .LBB61_162
+; CHECK-RV32-NEXT: j .LBB61_161
; CHECK-RV32-NEXT: .LBB61_687: # %cond.load605
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5979,9 +5685,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 6
-; CHECK-RV32-NEXT: bltz a2, .LBB61_688
-; CHECK-RV32-NEXT: j .LBB61_163
+; CHECK-RV32-NEXT: j .LBB61_162
; CHECK-RV32-NEXT: .LBB61_688: # %cond.load609
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5994,9 +5698,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 5
-; CHECK-RV32-NEXT: bltz a2, .LBB61_689
-; CHECK-RV32-NEXT: j .LBB61_164
+; CHECK-RV32-NEXT: j .LBB61_163
; CHECK-RV32-NEXT: .LBB61_689: # %cond.load613
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6009,9 +5711,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 4
-; CHECK-RV32-NEXT: bltz a2, .LBB61_690
-; CHECK-RV32-NEXT: j .LBB61_165
+; CHECK-RV32-NEXT: j .LBB61_164
; CHECK-RV32-NEXT: .LBB61_690: # %cond.load617
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6024,9 +5724,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 3
-; CHECK-RV32-NEXT: bltz a2, .LBB61_691
-; CHECK-RV32-NEXT: j .LBB61_166
+; CHECK-RV32-NEXT: j .LBB61_165
; CHECK-RV32-NEXT: .LBB61_691: # %cond.load621
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6039,11 +5737,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 2
-; CHECK-RV32-NEXT: bgez a2, .LBB61_1029
-; CHECK-RV32-NEXT: j .LBB61_167
-; CHECK-RV32-NEXT: .LBB61_1029: # %cond.load621
-; CHECK-RV32-NEXT: j .LBB61_168
+; CHECK-RV32-NEXT: j .LBB61_166
; CHECK-RV32-NEXT: .LBB61_692: # %cond.load633
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -6055,9 +5749,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 1
-; CHECK-RV32-NEXT: bnez a3, .LBB61_693
-; CHECK-RV32-NEXT: j .LBB61_172
+; CHECK-RV32-NEXT: j .LBB61_171
; CHECK-RV32-NEXT: .LBB61_693: # %cond.load637
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6070,9 +5762,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 2
-; CHECK-RV32-NEXT: bnez a3, .LBB61_694
-; CHECK-RV32-NEXT: j .LBB61_173
+; CHECK-RV32-NEXT: j .LBB61_172
; CHECK-RV32-NEXT: .LBB61_694: # %cond.load641
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6085,9 +5775,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 4
-; CHECK-RV32-NEXT: bnez a3, .LBB61_695
-; CHECK-RV32-NEXT: j .LBB61_174
+; CHECK-RV32-NEXT: j .LBB61_173
; CHECK-RV32-NEXT: .LBB61_695: # %cond.load645
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6100,9 +5788,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 8
-; CHECK-RV32-NEXT: bnez a3, .LBB61_696
-; CHECK-RV32-NEXT: j .LBB61_175
+; CHECK-RV32-NEXT: j .LBB61_174
; CHECK-RV32-NEXT: .LBB61_696: # %cond.load649
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6115,9 +5801,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 16
-; CHECK-RV32-NEXT: bnez a3, .LBB61_697
-; CHECK-RV32-NEXT: j .LBB61_176
+; CHECK-RV32-NEXT: j .LBB61_175
; CHECK-RV32-NEXT: .LBB61_697: # %cond.load653
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6130,9 +5814,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 32
-; CHECK-RV32-NEXT: bnez a3, .LBB61_698
-; CHECK-RV32-NEXT: j .LBB61_177
+; CHECK-RV32-NEXT: j .LBB61_176
; CHECK-RV32-NEXT: .LBB61_698: # %cond.load657
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6145,9 +5827,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 64
-; CHECK-RV32-NEXT: bnez a3, .LBB61_699
-; CHECK-RV32-NEXT: j .LBB61_178
+; CHECK-RV32-NEXT: j .LBB61_177
; CHECK-RV32-NEXT: .LBB61_699: # %cond.load661
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6160,9 +5840,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 128
-; CHECK-RV32-NEXT: bnez a3, .LBB61_700
-; CHECK-RV32-NEXT: j .LBB61_179
+; CHECK-RV32-NEXT: j .LBB61_178
; CHECK-RV32-NEXT: .LBB61_700: # %cond.load665
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6175,9 +5853,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 256
-; CHECK-RV32-NEXT: bnez a3, .LBB61_701
-; CHECK-RV32-NEXT: j .LBB61_180
+; CHECK-RV32-NEXT: j .LBB61_179
; CHECK-RV32-NEXT: .LBB61_701: # %cond.load669
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6190,9 +5866,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 512
-; CHECK-RV32-NEXT: bnez a3, .LBB61_702
-; CHECK-RV32-NEXT: j .LBB61_181
+; CHECK-RV32-NEXT: j .LBB61_180
; CHECK-RV32-NEXT: .LBB61_702: # %cond.load673
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6205,9 +5879,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 1024
-; CHECK-RV32-NEXT: bnez a3, .LBB61_703
-; CHECK-RV32-NEXT: j .LBB61_182
+; CHECK-RV32-NEXT: j .LBB61_181
; CHECK-RV32-NEXT: .LBB61_703: # %cond.load677
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6220,9 +5892,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 20
-; CHECK-RV32-NEXT: bltz a3, .LBB61_704
-; CHECK-RV32-NEXT: j .LBB61_183
+; CHECK-RV32-NEXT: j .LBB61_182
; CHECK-RV32-NEXT: .LBB61_704: # %cond.load681
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6235,9 +5905,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 19
-; CHECK-RV32-NEXT: bltz a3, .LBB61_705
-; CHECK-RV32-NEXT: j .LBB61_184
+; CHECK-RV32-NEXT: j .LBB61_183
; CHECK-RV32-NEXT: .LBB61_705: # %cond.load685
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6250,9 +5918,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 18
-; CHECK-RV32-NEXT: bltz a3, .LBB61_706
-; CHECK-RV32-NEXT: j .LBB61_185
+; CHECK-RV32-NEXT: j .LBB61_184
; CHECK-RV32-NEXT: .LBB61_706: # %cond.load689
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6265,9 +5931,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 17
-; CHECK-RV32-NEXT: bltz a3, .LBB61_707
-; CHECK-RV32-NEXT: j .LBB61_186
+; CHECK-RV32-NEXT: j .LBB61_185
; CHECK-RV32-NEXT: .LBB61_707: # %cond.load693
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6280,9 +5944,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 16
-; CHECK-RV32-NEXT: bltz a3, .LBB61_708
-; CHECK-RV32-NEXT: j .LBB61_187
+; CHECK-RV32-NEXT: j .LBB61_186
; CHECK-RV32-NEXT: .LBB61_708: # %cond.load697
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6295,9 +5957,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 15
-; CHECK-RV32-NEXT: bltz a3, .LBB61_709
-; CHECK-RV32-NEXT: j .LBB61_188
+; CHECK-RV32-NEXT: j .LBB61_187
; CHECK-RV32-NEXT: .LBB61_709: # %cond.load701
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6310,9 +5970,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 14
-; CHECK-RV32-NEXT: bltz a3, .LBB61_710
-; CHECK-RV32-NEXT: j .LBB61_189
+; CHECK-RV32-NEXT: j .LBB61_188
; CHECK-RV32-NEXT: .LBB61_710: # %cond.load705
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6325,9 +5983,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 13
-; CHECK-RV32-NEXT: bltz a3, .LBB61_711
-; CHECK-RV32-NEXT: j .LBB61_190
+; CHECK-RV32-NEXT: j .LBB61_189
; CHECK-RV32-NEXT: .LBB61_711: # %cond.load709
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6340,9 +5996,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 12
-; CHECK-RV32-NEXT: bltz a3, .LBB61_712
-; CHECK-RV32-NEXT: j .LBB61_191
+; CHECK-RV32-NEXT: j .LBB61_190
; CHECK-RV32-NEXT: .LBB61_712: # %cond.load713
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6355,9 +6009,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 11
-; CHECK-RV32-NEXT: bltz a3, .LBB61_713
-; CHECK-RV32-NEXT: j .LBB61_192
+; CHECK-RV32-NEXT: j .LBB61_191
; CHECK-RV32-NEXT: .LBB61_713: # %cond.load717
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6370,9 +6022,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 10
-; CHECK-RV32-NEXT: bltz a3, .LBB61_714
-; CHECK-RV32-NEXT: j .LBB61_193
+; CHECK-RV32-NEXT: j .LBB61_192
; CHECK-RV32-NEXT: .LBB61_714: # %cond.load721
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6385,9 +6035,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 9
-; CHECK-RV32-NEXT: bltz a3, .LBB61_715
-; CHECK-RV32-NEXT: j .LBB61_194
+; CHECK-RV32-NEXT: j .LBB61_193
; CHECK-RV32-NEXT: .LBB61_715: # %cond.load725
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6400,9 +6048,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 8
-; CHECK-RV32-NEXT: bltz a3, .LBB61_716
-; CHECK-RV32-NEXT: j .LBB61_195
+; CHECK-RV32-NEXT: j .LBB61_194
; CHECK-RV32-NEXT: .LBB61_716: # %cond.load729
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6415,9 +6061,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 7
-; CHECK-RV32-NEXT: bltz a3, .LBB61_717
-; CHECK-RV32-NEXT: j .LBB61_196
+; CHECK-RV32-NEXT: j .LBB61_195
; CHECK-RV32-NEXT: .LBB61_717: # %cond.load733
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6430,9 +6074,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 6
-; CHECK-RV32-NEXT: bltz a3, .LBB61_718
-; CHECK-RV32-NEXT: j .LBB61_197
+; CHECK-RV32-NEXT: j .LBB61_196
; CHECK-RV32-NEXT: .LBB61_718: # %cond.load737
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6445,9 +6087,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 5
-; CHECK-RV32-NEXT: bltz a3, .LBB61_719
-; CHECK-RV32-NEXT: j .LBB61_198
+; CHECK-RV32-NEXT: j .LBB61_197
; CHECK-RV32-NEXT: .LBB61_719: # %cond.load741
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6460,9 +6100,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 4
-; CHECK-RV32-NEXT: bltz a3, .LBB61_720
-; CHECK-RV32-NEXT: j .LBB61_199
+; CHECK-RV32-NEXT: j .LBB61_198
; CHECK-RV32-NEXT: .LBB61_720: # %cond.load745
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6475,9 +6113,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 3
-; CHECK-RV32-NEXT: bltz a3, .LBB61_721
-; CHECK-RV32-NEXT: j .LBB61_200
+; CHECK-RV32-NEXT: j .LBB61_199
; CHECK-RV32-NEXT: .LBB61_721: # %cond.load749
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6490,11 +6126,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 2
-; CHECK-RV32-NEXT: bgez a3, .LBB61_1030
-; CHECK-RV32-NEXT: j .LBB61_201
-; CHECK-RV32-NEXT: .LBB61_1030: # %cond.load749
-; CHECK-RV32-NEXT: j .LBB61_202
+; CHECK-RV32-NEXT: j .LBB61_200
; CHECK-RV32-NEXT: .LBB61_722: # %cond.load761
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -6506,9 +6138,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 1
-; CHECK-RV32-NEXT: bnez a2, .LBB61_723
-; CHECK-RV32-NEXT: j .LBB61_206
+; CHECK-RV32-NEXT: j .LBB61_205
; CHECK-RV32-NEXT: .LBB61_723: # %cond.load765
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6521,9 +6151,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 2
-; CHECK-RV32-NEXT: bnez a2, .LBB61_724
-; CHECK-RV32-NEXT: j .LBB61_207
+; CHECK-RV32-NEXT: j .LBB61_206
; CHECK-RV32-NEXT: .LBB61_724: # %cond.load769
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6536,9 +6164,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 4
-; CHECK-RV32-NEXT: bnez a2, .LBB61_725
-; CHECK-RV32-NEXT: j .LBB61_208
+; CHECK-RV32-NEXT: j .LBB61_207
; CHECK-RV32-NEXT: .LBB61_725: # %cond.load773
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6551,9 +6177,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 8
-; CHECK-RV32-NEXT: bnez a2, .LBB61_726
-; CHECK-RV32-NEXT: j .LBB61_209
+; CHECK-RV32-NEXT: j .LBB61_208
; CHECK-RV32-NEXT: .LBB61_726: # %cond.load777
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6566,9 +6190,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 16
-; CHECK-RV32-NEXT: bnez a2, .LBB61_727
-; CHECK-RV32-NEXT: j .LBB61_210
+; CHECK-RV32-NEXT: j .LBB61_209
; CHECK-RV32-NEXT: .LBB61_727: # %cond.load781
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6581,9 +6203,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 32
-; CHECK-RV32-NEXT: bnez a2, .LBB61_728
-; CHECK-RV32-NEXT: j .LBB61_211
+; CHECK-RV32-NEXT: j .LBB61_210
; CHECK-RV32-NEXT: .LBB61_728: # %cond.load785
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6596,9 +6216,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 64
-; CHECK-RV32-NEXT: bnez a2, .LBB61_729
-; CHECK-RV32-NEXT: j .LBB61_212
+; CHECK-RV32-NEXT: j .LBB61_211
; CHECK-RV32-NEXT: .LBB61_729: # %cond.load789
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6611,9 +6229,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 128
-; CHECK-RV32-NEXT: bnez a2, .LBB61_730
-; CHECK-RV32-NEXT: j .LBB61_213
+; CHECK-RV32-NEXT: j .LBB61_212
; CHECK-RV32-NEXT: .LBB61_730: # %cond.load793
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6626,9 +6242,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 256
-; CHECK-RV32-NEXT: bnez a2, .LBB61_731
-; CHECK-RV32-NEXT: j .LBB61_214
+; CHECK-RV32-NEXT: j .LBB61_213
; CHECK-RV32-NEXT: .LBB61_731: # %cond.load797
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6641,9 +6255,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 512
-; CHECK-RV32-NEXT: bnez a2, .LBB61_732
-; CHECK-RV32-NEXT: j .LBB61_215
+; CHECK-RV32-NEXT: j .LBB61_214
; CHECK-RV32-NEXT: .LBB61_732: # %cond.load801
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6656,9 +6268,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 1024
-; CHECK-RV32-NEXT: bnez a2, .LBB61_733
-; CHECK-RV32-NEXT: j .LBB61_216
+; CHECK-RV32-NEXT: j .LBB61_215
; CHECK-RV32-NEXT: .LBB61_733: # %cond.load805
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6671,9 +6281,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 20
-; CHECK-RV32-NEXT: bltz a2, .LBB61_734
-; CHECK-RV32-NEXT: j .LBB61_217
+; CHECK-RV32-NEXT: j .LBB61_216
; CHECK-RV32-NEXT: .LBB61_734: # %cond.load809
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6686,9 +6294,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 19
-; CHECK-RV32-NEXT: bltz a2, .LBB61_735
-; CHECK-RV32-NEXT: j .LBB61_218
+; CHECK-RV32-NEXT: j .LBB61_217
; CHECK-RV32-NEXT: .LBB61_735: # %cond.load813
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6701,9 +6307,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 18
-; CHECK-RV32-NEXT: bltz a2, .LBB61_736
-; CHECK-RV32-NEXT: j .LBB61_219
+; CHECK-RV32-NEXT: j .LBB61_218
; CHECK-RV32-NEXT: .LBB61_736: # %cond.load817
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6716,9 +6320,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 17
-; CHECK-RV32-NEXT: bltz a2, .LBB61_737
-; CHECK-RV32-NEXT: j .LBB61_220
+; CHECK-RV32-NEXT: j .LBB61_219
; CHECK-RV32-NEXT: .LBB61_737: # %cond.load821
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6731,9 +6333,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 16
-; CHECK-RV32-NEXT: bltz a2, .LBB61_738
-; CHECK-RV32-NEXT: j .LBB61_221
+; CHECK-RV32-NEXT: j .LBB61_220
; CHECK-RV32-NEXT: .LBB61_738: # %cond.load825
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6746,9 +6346,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 15
-; CHECK-RV32-NEXT: bltz a2, .LBB61_739
-; CHECK-RV32-NEXT: j .LBB61_222
+; CHECK-RV32-NEXT: j .LBB61_221
; CHECK-RV32-NEXT: .LBB61_739: # %cond.load829
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6761,9 +6359,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 14
-; CHECK-RV32-NEXT: bltz a2, .LBB61_740
-; CHECK-RV32-NEXT: j .LBB61_223
+; CHECK-RV32-NEXT: j .LBB61_222
; CHECK-RV32-NEXT: .LBB61_740: # %cond.load833
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6776,9 +6372,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 13
-; CHECK-RV32-NEXT: bltz a2, .LBB61_741
-; CHECK-RV32-NEXT: j .LBB61_224
+; CHECK-RV32-NEXT: j .LBB61_223
; CHECK-RV32-NEXT: .LBB61_741: # %cond.load837
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6791,9 +6385,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 12
-; CHECK-RV32-NEXT: bltz a2, .LBB61_742
-; CHECK-RV32-NEXT: j .LBB61_225
+; CHECK-RV32-NEXT: j .LBB61_224
; CHECK-RV32-NEXT: .LBB61_742: # %cond.load841
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6806,9 +6398,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 11
-; CHECK-RV32-NEXT: bltz a2, .LBB61_743
-; CHECK-RV32-NEXT: j .LBB61_226
+; CHECK-RV32-NEXT: j .LBB61_225
; CHECK-RV32-NEXT: .LBB61_743: # %cond.load845
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6821,9 +6411,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 10
-; CHECK-RV32-NEXT: bltz a2, .LBB61_744
-; CHECK-RV32-NEXT: j .LBB61_227
+; CHECK-RV32-NEXT: j .LBB61_226
; CHECK-RV32-NEXT: .LBB61_744: # %cond.load849
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6836,9 +6424,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 9
-; CHECK-RV32-NEXT: bltz a2, .LBB61_745
-; CHECK-RV32-NEXT: j .LBB61_228
+; CHECK-RV32-NEXT: j .LBB61_227
; CHECK-RV32-NEXT: .LBB61_745: # %cond.load853
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6851,9 +6437,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 8
-; CHECK-RV32-NEXT: bltz a2, .LBB61_746
-; CHECK-RV32-NEXT: j .LBB61_229
+; CHECK-RV32-NEXT: j .LBB61_228
; CHECK-RV32-NEXT: .LBB61_746: # %cond.load857
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6866,9 +6450,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 7
-; CHECK-RV32-NEXT: bltz a2, .LBB61_747
-; CHECK-RV32-NEXT: j .LBB61_230
+; CHECK-RV32-NEXT: j .LBB61_229
; CHECK-RV32-NEXT: .LBB61_747: # %cond.load861
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6881,9 +6463,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 6
-; CHECK-RV32-NEXT: bltz a2, .LBB61_748
-; CHECK-RV32-NEXT: j .LBB61_231
+; CHECK-RV32-NEXT: j .LBB61_230
; CHECK-RV32-NEXT: .LBB61_748: # %cond.load865
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6896,9 +6476,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 5
-; CHECK-RV32-NEXT: bltz a2, .LBB61_749
-; CHECK-RV32-NEXT: j .LBB61_232
+; CHECK-RV32-NEXT: j .LBB61_231
; CHECK-RV32-NEXT: .LBB61_749: # %cond.load869
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6911,9 +6489,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 4
-; CHECK-RV32-NEXT: bltz a2, .LBB61_750
-; CHECK-RV32-NEXT: j .LBB61_233
+; CHECK-RV32-NEXT: j .LBB61_232
; CHECK-RV32-NEXT: .LBB61_750: # %cond.load873
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6926,9 +6502,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 3
-; CHECK-RV32-NEXT: bltz a2, .LBB61_751
-; CHECK-RV32-NEXT: j .LBB61_234
+; CHECK-RV32-NEXT: j .LBB61_233
; CHECK-RV32-NEXT: .LBB61_751: # %cond.load877
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6941,11 +6515,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a2, a3, 2
-; CHECK-RV32-NEXT: bgez a2, .LBB61_1031
-; CHECK-RV32-NEXT: j .LBB61_235
-; CHECK-RV32-NEXT: .LBB61_1031: # %cond.load877
-; CHECK-RV32-NEXT: j .LBB61_236
+; CHECK-RV32-NEXT: j .LBB61_234
; CHECK-RV32-NEXT: .LBB61_752: # %cond.load889
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -6957,9 +6527,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 1
-; CHECK-RV32-NEXT: bnez a3, .LBB61_753
-; CHECK-RV32-NEXT: j .LBB61_240
+; CHECK-RV32-NEXT: j .LBB61_239
; CHECK-RV32-NEXT: .LBB61_753: # %cond.load893
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6972,9 +6540,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 2
-; CHECK-RV32-NEXT: bnez a3, .LBB61_754
-; CHECK-RV32-NEXT: j .LBB61_241
+; CHECK-RV32-NEXT: j .LBB61_240
; CHECK-RV32-NEXT: .LBB61_754: # %cond.load897
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6987,9 +6553,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 4
-; CHECK-RV32-NEXT: bnez a3, .LBB61_755
-; CHECK-RV32-NEXT: j .LBB61_242
+; CHECK-RV32-NEXT: j .LBB61_241
; CHECK-RV32-NEXT: .LBB61_755: # %cond.load901
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7002,9 +6566,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 8
-; CHECK-RV32-NEXT: bnez a3, .LBB61_756
-; CHECK-RV32-NEXT: j .LBB61_243
+; CHECK-RV32-NEXT: j .LBB61_242
; CHECK-RV32-NEXT: .LBB61_756: # %cond.load905
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7017,9 +6579,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 16
-; CHECK-RV32-NEXT: bnez a3, .LBB61_757
-; CHECK-RV32-NEXT: j .LBB61_244
+; CHECK-RV32-NEXT: j .LBB61_243
; CHECK-RV32-NEXT: .LBB61_757: # %cond.load909
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7032,9 +6592,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 32
-; CHECK-RV32-NEXT: bnez a3, .LBB61_758
-; CHECK-RV32-NEXT: j .LBB61_245
+; CHECK-RV32-NEXT: j .LBB61_244
; CHECK-RV32-NEXT: .LBB61_758: # %cond.load913
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7047,9 +6605,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 64
-; CHECK-RV32-NEXT: bnez a3, .LBB61_759
-; CHECK-RV32-NEXT: j .LBB61_246
+; CHECK-RV32-NEXT: j .LBB61_245
; CHECK-RV32-NEXT: .LBB61_759: # %cond.load917
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7062,9 +6618,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 128
-; CHECK-RV32-NEXT: bnez a3, .LBB61_760
-; CHECK-RV32-NEXT: j .LBB61_247
+; CHECK-RV32-NEXT: j .LBB61_246
; CHECK-RV32-NEXT: .LBB61_760: # %cond.load921
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7077,9 +6631,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 256
-; CHECK-RV32-NEXT: bnez a3, .LBB61_761
-; CHECK-RV32-NEXT: j .LBB61_248
+; CHECK-RV32-NEXT: j .LBB61_247
; CHECK-RV32-NEXT: .LBB61_761: # %cond.load925
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7092,9 +6644,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 512
-; CHECK-RV32-NEXT: bnez a3, .LBB61_762
-; CHECK-RV32-NEXT: j .LBB61_249
+; CHECK-RV32-NEXT: j .LBB61_248
; CHECK-RV32-NEXT: .LBB61_762: # %cond.load929
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7107,9 +6657,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a3, a2, 1024
-; CHECK-RV32-NEXT: bnez a3, .LBB61_763
-; CHECK-RV32-NEXT: j .LBB61_250
+; CHECK-RV32-NEXT: j .LBB61_249
; CHECK-RV32-NEXT: .LBB61_763: # %cond.load933
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7122,9 +6670,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 20
-; CHECK-RV32-NEXT: bltz a3, .LBB61_764
-; CHECK-RV32-NEXT: j .LBB61_251
+; CHECK-RV32-NEXT: j .LBB61_250
; CHECK-RV32-NEXT: .LBB61_764: # %cond.load937
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7137,9 +6683,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 19
-; CHECK-RV32-NEXT: bltz a3, .LBB61_765
-; CHECK-RV32-NEXT: j .LBB61_252
+; CHECK-RV32-NEXT: j .LBB61_251
; CHECK-RV32-NEXT: .LBB61_765: # %cond.load941
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7152,9 +6696,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 18
-; CHECK-RV32-NEXT: bltz a3, .LBB61_766
-; CHECK-RV32-NEXT: j .LBB61_253
+; CHECK-RV32-NEXT: j .LBB61_252
; CHECK-RV32-NEXT: .LBB61_766: # %cond.load945
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7167,9 +6709,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 17
-; CHECK-RV32-NEXT: bltz a3, .LBB61_767
-; CHECK-RV32-NEXT: j .LBB61_254
+; CHECK-RV32-NEXT: j .LBB61_253
; CHECK-RV32-NEXT: .LBB61_767: # %cond.load949
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7182,9 +6722,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 16
-; CHECK-RV32-NEXT: bltz a3, .LBB61_768
-; CHECK-RV32-NEXT: j .LBB61_255
+; CHECK-RV32-NEXT: j .LBB61_254
; CHECK-RV32-NEXT: .LBB61_768: # %cond.load953
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7197,9 +6735,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 15
-; CHECK-RV32-NEXT: bltz a3, .LBB61_769
-; CHECK-RV32-NEXT: j .LBB61_256
+; CHECK-RV32-NEXT: j .LBB61_255
; CHECK-RV32-NEXT: .LBB61_769: # %cond.load957
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7212,9 +6748,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 14
-; CHECK-RV32-NEXT: bltz a3, .LBB61_770
-; CHECK-RV32-NEXT: j .LBB61_257
+; CHECK-RV32-NEXT: j .LBB61_256
; CHECK-RV32-NEXT: .LBB61_770: # %cond.load961
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7227,9 +6761,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 13
-; CHECK-RV32-NEXT: bltz a3, .LBB61_771
-; CHECK-RV32-NEXT: j .LBB61_258
+; CHECK-RV32-NEXT: j .LBB61_257
; CHECK-RV32-NEXT: .LBB61_771: # %cond.load965
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7242,9 +6774,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 12
-; CHECK-RV32-NEXT: bltz a3, .LBB61_772
-; CHECK-RV32-NEXT: j .LBB61_259
+; CHECK-RV32-NEXT: j .LBB61_258
; CHECK-RV32-NEXT: .LBB61_772: # %cond.load969
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7257,9 +6787,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 11
-; CHECK-RV32-NEXT: bltz a3, .LBB61_773
-; CHECK-RV32-NEXT: j .LBB61_260
+; CHECK-RV32-NEXT: j .LBB61_259
; CHECK-RV32-NEXT: .LBB61_773: # %cond.load973
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7272,9 +6800,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 10
-; CHECK-RV32-NEXT: bltz a3, .LBB61_774
-; CHECK-RV32-NEXT: j .LBB61_261
+; CHECK-RV32-NEXT: j .LBB61_260
; CHECK-RV32-NEXT: .LBB61_774: # %cond.load977
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7287,9 +6813,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 9
-; CHECK-RV32-NEXT: bltz a3, .LBB61_775
-; CHECK-RV32-NEXT: j .LBB61_262
+; CHECK-RV32-NEXT: j .LBB61_261
; CHECK-RV32-NEXT: .LBB61_775: # %cond.load981
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7302,9 +6826,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 8
-; CHECK-RV32-NEXT: bltz a3, .LBB61_776
-; CHECK-RV32-NEXT: j .LBB61_263
+; CHECK-RV32-NEXT: j .LBB61_262
; CHECK-RV32-NEXT: .LBB61_776: # %cond.load985
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7317,9 +6839,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 7
-; CHECK-RV32-NEXT: bltz a3, .LBB61_777
-; CHECK-RV32-NEXT: j .LBB61_264
+; CHECK-RV32-NEXT: j .LBB61_263
; CHECK-RV32-NEXT: .LBB61_777: # %cond.load989
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7332,9 +6852,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 6
-; CHECK-RV32-NEXT: bltz a3, .LBB61_778
-; CHECK-RV32-NEXT: j .LBB61_265
+; CHECK-RV32-NEXT: j .LBB61_264
; CHECK-RV32-NEXT: .LBB61_778: # %cond.load993
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7347,9 +6865,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 5
-; CHECK-RV32-NEXT: bltz a3, .LBB61_779
-; CHECK-RV32-NEXT: j .LBB61_266
+; CHECK-RV32-NEXT: j .LBB61_265
; CHECK-RV32-NEXT: .LBB61_779: # %cond.load997
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7362,9 +6878,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 4
-; CHECK-RV32-NEXT: bltz a3, .LBB61_780
-; CHECK-RV32-NEXT: j .LBB61_267
+; CHECK-RV32-NEXT: j .LBB61_266
; CHECK-RV32-NEXT: .LBB61_780: # %cond.load1001
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7377,9 +6891,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 3
-; CHECK-RV32-NEXT: bltz a3, .LBB61_781
-; CHECK-RV32-NEXT: j .LBB61_268
+; CHECK-RV32-NEXT: j .LBB61_267
; CHECK-RV32-NEXT: .LBB61_781: # %cond.load1005
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -7392,11 +6904,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: slli a3, a2, 2
-; CHECK-RV32-NEXT: bgez a3, .LBB61_1032
-; CHECK-RV32-NEXT: j .LBB61_269
-; CHECK-RV32-NEXT: .LBB61_1032: # %cond.load1005
-; CHECK-RV32-NEXT: j .LBB61_270
+; CHECK-RV32-NEXT: j .LBB61_268
; CHECK-RV32-NEXT: .LBB61_782: # %cond.load1017
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -7408,9 +6916,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: andi a2, a3, 1
-; CHECK-RV32-NEXT: bnez a2, .LBB61_783
-; CHECK-RV32-NEXT: j .LBB61_274
+; CHECK-RV32-NEXT: j .LBB61_273
; CHECK-RV32-NEXT: .LBB61_783: # %cond.load1021
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7421,9 +6927,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 2
-; CHECK-RV32-NEXT: bnez a2, .LBB61_784
-; CHECK-RV32-NEXT: j .LBB61_275
+; CHECK-RV32-NEXT: j .LBB61_274
; CHECK-RV32-NEXT: .LBB61_784: # %cond.load1025
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7434,9 +6938,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 4
-; CHECK-RV32-NEXT: bnez a2, .LBB61_785
-; CHECK-RV32-NEXT: j .LBB61_276
+; CHECK-RV32-NEXT: j .LBB61_275
; CHECK-RV32-NEXT: .LBB61_785: # %cond.load1029
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7447,9 +6949,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 8
-; CHECK-RV32-NEXT: bnez a2, .LBB61_786
-; CHECK-RV32-NEXT: j .LBB61_277
+; CHECK-RV32-NEXT: j .LBB61_276
; CHECK-RV32-NEXT: .LBB61_786: # %cond.load1033
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7460,9 +6960,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 16
-; CHECK-RV32-NEXT: bnez a2, .LBB61_787
-; CHECK-RV32-NEXT: j .LBB61_278
+; CHECK-RV32-NEXT: j .LBB61_277
; CHECK-RV32-NEXT: .LBB61_787: # %cond.load1037
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7473,9 +6971,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 32
-; CHECK-RV32-NEXT: bnez a2, .LBB61_788
-; CHECK-RV32-NEXT: j .LBB61_279
+; CHECK-RV32-NEXT: j .LBB61_278
; CHECK-RV32-NEXT: .LBB61_788: # %cond.load1041
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7486,9 +6982,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 64
-; CHECK-RV32-NEXT: bnez a2, .LBB61_789
-; CHECK-RV32-NEXT: j .LBB61_280
+; CHECK-RV32-NEXT: j .LBB61_279
; CHECK-RV32-NEXT: .LBB61_789: # %cond.load1045
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7499,9 +6993,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 128
-; CHECK-RV32-NEXT: bnez a2, .LBB61_790
-; CHECK-RV32-NEXT: j .LBB61_281
+; CHECK-RV32-NEXT: j .LBB61_280
; CHECK-RV32-NEXT: .LBB61_790: # %cond.load1049
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7512,9 +7004,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 256
-; CHECK-RV32-NEXT: bnez a2, .LBB61_791
-; CHECK-RV32-NEXT: j .LBB61_282
+; CHECK-RV32-NEXT: j .LBB61_281
; CHECK-RV32-NEXT: .LBB61_791: # %cond.load1053
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7525,9 +7015,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 512
-; CHECK-RV32-NEXT: bnez a2, .LBB61_792
-; CHECK-RV32-NEXT: j .LBB61_283
+; CHECK-RV32-NEXT: j .LBB61_282
; CHECK-RV32-NEXT: .LBB61_792: # %cond.load1057
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7538,9 +7026,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 1024
-; CHECK-RV32-NEXT: bnez a2, .LBB61_793
-; CHECK-RV32-NEXT: j .LBB61_284
+; CHECK-RV32-NEXT: j .LBB61_283
; CHECK-RV32-NEXT: .LBB61_793: # %cond.load1061
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7551,9 +7037,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 20
-; CHECK-RV32-NEXT: bltz a2, .LBB61_794
-; CHECK-RV32-NEXT: j .LBB61_285
+; CHECK-RV32-NEXT: j .LBB61_284
; CHECK-RV32-NEXT: .LBB61_794: # %cond.load1065
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7564,9 +7048,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 19
-; CHECK-RV32-NEXT: bltz a2, .LBB61_795
-; CHECK-RV32-NEXT: j .LBB61_286
+; CHECK-RV32-NEXT: j .LBB61_285
; CHECK-RV32-NEXT: .LBB61_795: # %cond.load1069
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7577,9 +7059,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 18
-; CHECK-RV32-NEXT: bltz a2, .LBB61_796
-; CHECK-RV32-NEXT: j .LBB61_287
+; CHECK-RV32-NEXT: j .LBB61_286
; CHECK-RV32-NEXT: .LBB61_796: # %cond.load1073
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7590,9 +7070,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 17
-; CHECK-RV32-NEXT: bltz a2, .LBB61_797
-; CHECK-RV32-NEXT: j .LBB61_288
+; CHECK-RV32-NEXT: j .LBB61_287
; CHECK-RV32-NEXT: .LBB61_797: # %cond.load1077
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7603,9 +7081,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 16
-; CHECK-RV32-NEXT: bltz a2, .LBB61_798
-; CHECK-RV32-NEXT: j .LBB61_289
+; CHECK-RV32-NEXT: j .LBB61_288
; CHECK-RV32-NEXT: .LBB61_798: # %cond.load1081
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7616,9 +7092,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 15
-; CHECK-RV32-NEXT: bltz a2, .LBB61_799
-; CHECK-RV32-NEXT: j .LBB61_290
+; CHECK-RV32-NEXT: j .LBB61_289
; CHECK-RV32-NEXT: .LBB61_799: # %cond.load1085
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7629,9 +7103,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 14
-; CHECK-RV32-NEXT: bltz a2, .LBB61_800
-; CHECK-RV32-NEXT: j .LBB61_291
+; CHECK-RV32-NEXT: j .LBB61_290
; CHECK-RV32-NEXT: .LBB61_800: # %cond.load1089
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7642,9 +7114,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 13
-; CHECK-RV32-NEXT: bltz a2, .LBB61_801
-; CHECK-RV32-NEXT: j .LBB61_292
+; CHECK-RV32-NEXT: j .LBB61_291
; CHECK-RV32-NEXT: .LBB61_801: # %cond.load1093
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7655,9 +7125,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 12
-; CHECK-RV32-NEXT: bltz a2, .LBB61_802
-; CHECK-RV32-NEXT: j .LBB61_293
+; CHECK-RV32-NEXT: j .LBB61_292
; CHECK-RV32-NEXT: .LBB61_802: # %cond.load1097
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7668,9 +7136,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 11
-; CHECK-RV32-NEXT: bltz a2, .LBB61_803
-; CHECK-RV32-NEXT: j .LBB61_294
+; CHECK-RV32-NEXT: j .LBB61_293
; CHECK-RV32-NEXT: .LBB61_803: # %cond.load1101
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7681,9 +7147,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 10
-; CHECK-RV32-NEXT: bltz a2, .LBB61_804
-; CHECK-RV32-NEXT: j .LBB61_295
+; CHECK-RV32-NEXT: j .LBB61_294
; CHECK-RV32-NEXT: .LBB61_804: # %cond.load1105
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7694,9 +7158,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 9
-; CHECK-RV32-NEXT: bltz a2, .LBB61_805
-; CHECK-RV32-NEXT: j .LBB61_296
+; CHECK-RV32-NEXT: j .LBB61_295
; CHECK-RV32-NEXT: .LBB61_805: # %cond.load1109
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7707,9 +7169,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 8
-; CHECK-RV32-NEXT: bltz a2, .LBB61_806
-; CHECK-RV32-NEXT: j .LBB61_297
+; CHECK-RV32-NEXT: j .LBB61_296
; CHECK-RV32-NEXT: .LBB61_806: # %cond.load1113
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7720,9 +7180,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 7
-; CHECK-RV32-NEXT: bltz a2, .LBB61_807
-; CHECK-RV32-NEXT: j .LBB61_298
+; CHECK-RV32-NEXT: j .LBB61_297
; CHECK-RV32-NEXT: .LBB61_807: # %cond.load1117
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7733,9 +7191,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 6
-; CHECK-RV32-NEXT: bltz a2, .LBB61_808
-; CHECK-RV32-NEXT: j .LBB61_299
+; CHECK-RV32-NEXT: j .LBB61_298
; CHECK-RV32-NEXT: .LBB61_808: # %cond.load1121
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7746,9 +7202,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 5
-; CHECK-RV32-NEXT: bltz a2, .LBB61_809
-; CHECK-RV32-NEXT: j .LBB61_300
+; CHECK-RV32-NEXT: j .LBB61_299
; CHECK-RV32-NEXT: .LBB61_809: # %cond.load1125
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7759,9 +7213,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 4
-; CHECK-RV32-NEXT: bltz a2, .LBB61_810
-; CHECK-RV32-NEXT: j .LBB61_301
+; CHECK-RV32-NEXT: j .LBB61_300
; CHECK-RV32-NEXT: .LBB61_810: # %cond.load1129
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7772,9 +7224,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 3
-; CHECK-RV32-NEXT: bltz a2, .LBB61_811
-; CHECK-RV32-NEXT: j .LBB61_302
+; CHECK-RV32-NEXT: j .LBB61_301
; CHECK-RV32-NEXT: .LBB61_811: # %cond.load1133
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7785,11 +7235,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 2
-; CHECK-RV32-NEXT: bgez a2, .LBB61_1033
-; CHECK-RV32-NEXT: j .LBB61_303
-; CHECK-RV32-NEXT: .LBB61_1033: # %cond.load1133
-; CHECK-RV32-NEXT: j .LBB61_304
+; CHECK-RV32-NEXT: j .LBB61_302
; CHECK-RV32-NEXT: .LBB61_812: # %cond.load1145
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a3
@@ -7798,9 +7244,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 1
-; CHECK-RV32-NEXT: bnez a3, .LBB61_813
-; CHECK-RV32-NEXT: j .LBB61_308
+; CHECK-RV32-NEXT: j .LBB61_307
; CHECK-RV32-NEXT: .LBB61_813: # %cond.load1149
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7811,9 +7255,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 2
-; CHECK-RV32-NEXT: bnez a3, .LBB61_814
-; CHECK-RV32-NEXT: j .LBB61_309
+; CHECK-RV32-NEXT: j .LBB61_308
; CHECK-RV32-NEXT: .LBB61_814: # %cond.load1153
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7824,9 +7266,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 4
-; CHECK-RV32-NEXT: bnez a3, .LBB61_815
-; CHECK-RV32-NEXT: j .LBB61_310
+; CHECK-RV32-NEXT: j .LBB61_309
; CHECK-RV32-NEXT: .LBB61_815: # %cond.load1157
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7837,9 +7277,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 8
-; CHECK-RV32-NEXT: bnez a3, .LBB61_816
-; CHECK-RV32-NEXT: j .LBB61_311
+; CHECK-RV32-NEXT: j .LBB61_310
; CHECK-RV32-NEXT: .LBB61_816: # %cond.load1161
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7850,9 +7288,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 16
-; CHECK-RV32-NEXT: bnez a3, .LBB61_817
-; CHECK-RV32-NEXT: j .LBB61_312
+; CHECK-RV32-NEXT: j .LBB61_311
; CHECK-RV32-NEXT: .LBB61_817: # %cond.load1165
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7863,9 +7299,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 32
-; CHECK-RV32-NEXT: bnez a3, .LBB61_818
-; CHECK-RV32-NEXT: j .LBB61_313
+; CHECK-RV32-NEXT: j .LBB61_312
; CHECK-RV32-NEXT: .LBB61_818: # %cond.load1169
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7876,9 +7310,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 64
-; CHECK-RV32-NEXT: bnez a3, .LBB61_819
-; CHECK-RV32-NEXT: j .LBB61_314
+; CHECK-RV32-NEXT: j .LBB61_313
; CHECK-RV32-NEXT: .LBB61_819: # %cond.load1173
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7889,9 +7321,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 128
-; CHECK-RV32-NEXT: bnez a3, .LBB61_820
-; CHECK-RV32-NEXT: j .LBB61_315
+; CHECK-RV32-NEXT: j .LBB61_314
; CHECK-RV32-NEXT: .LBB61_820: # %cond.load1177
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7902,9 +7332,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 256
-; CHECK-RV32-NEXT: bnez a3, .LBB61_821
-; CHECK-RV32-NEXT: j .LBB61_316
+; CHECK-RV32-NEXT: j .LBB61_315
; CHECK-RV32-NEXT: .LBB61_821: # %cond.load1181
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7915,9 +7343,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 512
-; CHECK-RV32-NEXT: bnez a3, .LBB61_822
-; CHECK-RV32-NEXT: j .LBB61_317
+; CHECK-RV32-NEXT: j .LBB61_316
; CHECK-RV32-NEXT: .LBB61_822: # %cond.load1185
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7928,9 +7354,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 1024
-; CHECK-RV32-NEXT: bnez a3, .LBB61_823
-; CHECK-RV32-NEXT: j .LBB61_318
+; CHECK-RV32-NEXT: j .LBB61_317
; CHECK-RV32-NEXT: .LBB61_823: # %cond.load1189
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7941,9 +7365,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 20
-; CHECK-RV32-NEXT: bltz a3, .LBB61_824
-; CHECK-RV32-NEXT: j .LBB61_319
+; CHECK-RV32-NEXT: j .LBB61_318
; CHECK-RV32-NEXT: .LBB61_824: # %cond.load1193
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7954,9 +7376,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 19
-; CHECK-RV32-NEXT: bltz a3, .LBB61_825
-; CHECK-RV32-NEXT: j .LBB61_320
+; CHECK-RV32-NEXT: j .LBB61_319
; CHECK-RV32-NEXT: .LBB61_825: # %cond.load1197
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7967,9 +7387,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 18
-; CHECK-RV32-NEXT: bltz a3, .LBB61_826
-; CHECK-RV32-NEXT: j .LBB61_321
+; CHECK-RV32-NEXT: j .LBB61_320
; CHECK-RV32-NEXT: .LBB61_826: # %cond.load1201
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7980,9 +7398,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 17
-; CHECK-RV32-NEXT: bltz a3, .LBB61_827
-; CHECK-RV32-NEXT: j .LBB61_322
+; CHECK-RV32-NEXT: j .LBB61_321
; CHECK-RV32-NEXT: .LBB61_827: # %cond.load1205
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7993,9 +7409,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 16
-; CHECK-RV32-NEXT: bltz a3, .LBB61_828
-; CHECK-RV32-NEXT: j .LBB61_323
+; CHECK-RV32-NEXT: j .LBB61_322
; CHECK-RV32-NEXT: .LBB61_828: # %cond.load1209
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8006,9 +7420,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 15
-; CHECK-RV32-NEXT: bltz a3, .LBB61_829
-; CHECK-RV32-NEXT: j .LBB61_324
+; CHECK-RV32-NEXT: j .LBB61_323
; CHECK-RV32-NEXT: .LBB61_829: # %cond.load1213
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8019,9 +7431,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 14
-; CHECK-RV32-NEXT: bltz a3, .LBB61_830
-; CHECK-RV32-NEXT: j .LBB61_325
+; CHECK-RV32-NEXT: j .LBB61_324
; CHECK-RV32-NEXT: .LBB61_830: # %cond.load1217
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8032,9 +7442,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 13
-; CHECK-RV32-NEXT: bltz a3, .LBB61_831
-; CHECK-RV32-NEXT: j .LBB61_326
+; CHECK-RV32-NEXT: j .LBB61_325
; CHECK-RV32-NEXT: .LBB61_831: # %cond.load1221
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8045,9 +7453,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 12
-; CHECK-RV32-NEXT: bltz a3, .LBB61_832
-; CHECK-RV32-NEXT: j .LBB61_327
+; CHECK-RV32-NEXT: j .LBB61_326
; CHECK-RV32-NEXT: .LBB61_832: # %cond.load1225
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8058,9 +7464,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 11
-; CHECK-RV32-NEXT: bltz a3, .LBB61_833
-; CHECK-RV32-NEXT: j .LBB61_328
+; CHECK-RV32-NEXT: j .LBB61_327
; CHECK-RV32-NEXT: .LBB61_833: # %cond.load1229
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8071,9 +7475,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 10
-; CHECK-RV32-NEXT: bltz a3, .LBB61_834
-; CHECK-RV32-NEXT: j .LBB61_329
+; CHECK-RV32-NEXT: j .LBB61_328
; CHECK-RV32-NEXT: .LBB61_834: # %cond.load1233
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8084,9 +7486,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 9
-; CHECK-RV32-NEXT: bltz a3, .LBB61_835
-; CHECK-RV32-NEXT: j .LBB61_330
+; CHECK-RV32-NEXT: j .LBB61_329
; CHECK-RV32-NEXT: .LBB61_835: # %cond.load1237
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8097,9 +7497,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 8
-; CHECK-RV32-NEXT: bltz a3, .LBB61_836
-; CHECK-RV32-NEXT: j .LBB61_331
+; CHECK-RV32-NEXT: j .LBB61_330
; CHECK-RV32-NEXT: .LBB61_836: # %cond.load1241
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8110,9 +7508,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 7
-; CHECK-RV32-NEXT: bltz a3, .LBB61_837
-; CHECK-RV32-NEXT: j .LBB61_332
+; CHECK-RV32-NEXT: j .LBB61_331
; CHECK-RV32-NEXT: .LBB61_837: # %cond.load1245
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8123,9 +7519,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 6
-; CHECK-RV32-NEXT: bltz a3, .LBB61_838
-; CHECK-RV32-NEXT: j .LBB61_333
+; CHECK-RV32-NEXT: j .LBB61_332
; CHECK-RV32-NEXT: .LBB61_838: # %cond.load1249
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8136,9 +7530,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 5
-; CHECK-RV32-NEXT: bltz a3, .LBB61_839
-; CHECK-RV32-NEXT: j .LBB61_334
+; CHECK-RV32-NEXT: j .LBB61_333
; CHECK-RV32-NEXT: .LBB61_839: # %cond.load1253
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8149,9 +7541,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 4
-; CHECK-RV32-NEXT: bltz a3, .LBB61_840
-; CHECK-RV32-NEXT: j .LBB61_335
+; CHECK-RV32-NEXT: j .LBB61_334
; CHECK-RV32-NEXT: .LBB61_840: # %cond.load1257
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8162,9 +7552,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 3
-; CHECK-RV32-NEXT: bltz a3, .LBB61_841
-; CHECK-RV32-NEXT: j .LBB61_336
+; CHECK-RV32-NEXT: j .LBB61_335
; CHECK-RV32-NEXT: .LBB61_841: # %cond.load1261
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8175,11 +7563,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 2
-; CHECK-RV32-NEXT: bgez a3, .LBB61_1034
-; CHECK-RV32-NEXT: j .LBB61_337
-; CHECK-RV32-NEXT: .LBB61_1034: # %cond.load1261
-; CHECK-RV32-NEXT: j .LBB61_338
+; CHECK-RV32-NEXT: j .LBB61_336
; CHECK-RV32-NEXT: .LBB61_842: # %cond.load1273
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a2
@@ -8188,9 +7572,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 1
-; CHECK-RV32-NEXT: bnez a2, .LBB61_843
-; CHECK-RV32-NEXT: j .LBB61_342
+; CHECK-RV32-NEXT: j .LBB61_341
; CHECK-RV32-NEXT: .LBB61_843: # %cond.load1277
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8201,9 +7583,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 2
-; CHECK-RV32-NEXT: bnez a2, .LBB61_844
-; CHECK-RV32-NEXT: j .LBB61_343
+; CHECK-RV32-NEXT: j .LBB61_342
; CHECK-RV32-NEXT: .LBB61_844: # %cond.load1281
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8214,9 +7594,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 4
-; CHECK-RV32-NEXT: bnez a2, .LBB61_845
-; CHECK-RV32-NEXT: j .LBB61_344
+; CHECK-RV32-NEXT: j .LBB61_343
; CHECK-RV32-NEXT: .LBB61_845: # %cond.load1285
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8227,9 +7605,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 8
-; CHECK-RV32-NEXT: bnez a2, .LBB61_846
-; CHECK-RV32-NEXT: j .LBB61_345
+; CHECK-RV32-NEXT: j .LBB61_344
; CHECK-RV32-NEXT: .LBB61_846: # %cond.load1289
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8240,9 +7616,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 16
-; CHECK-RV32-NEXT: bnez a2, .LBB61_847
-; CHECK-RV32-NEXT: j .LBB61_346
+; CHECK-RV32-NEXT: j .LBB61_345
; CHECK-RV32-NEXT: .LBB61_847: # %cond.load1293
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8253,9 +7627,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 32
-; CHECK-RV32-NEXT: bnez a2, .LBB61_848
-; CHECK-RV32-NEXT: j .LBB61_347
+; CHECK-RV32-NEXT: j .LBB61_346
; CHECK-RV32-NEXT: .LBB61_848: # %cond.load1297
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8266,9 +7638,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 64
-; CHECK-RV32-NEXT: bnez a2, .LBB61_849
-; CHECK-RV32-NEXT: j .LBB61_348
+; CHECK-RV32-NEXT: j .LBB61_347
; CHECK-RV32-NEXT: .LBB61_849: # %cond.load1301
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8279,9 +7649,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 128
-; CHECK-RV32-NEXT: bnez a2, .LBB61_850
-; CHECK-RV32-NEXT: j .LBB61_349
+; CHECK-RV32-NEXT: j .LBB61_348
; CHECK-RV32-NEXT: .LBB61_850: # %cond.load1305
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8292,9 +7660,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 256
-; CHECK-RV32-NEXT: bnez a2, .LBB61_851
-; CHECK-RV32-NEXT: j .LBB61_350
+; CHECK-RV32-NEXT: j .LBB61_349
; CHECK-RV32-NEXT: .LBB61_851: # %cond.load1309
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8305,9 +7671,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 512
-; CHECK-RV32-NEXT: bnez a2, .LBB61_852
-; CHECK-RV32-NEXT: j .LBB61_351
+; CHECK-RV32-NEXT: j .LBB61_350
; CHECK-RV32-NEXT: .LBB61_852: # %cond.load1313
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8318,9 +7682,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 1024
-; CHECK-RV32-NEXT: bnez a2, .LBB61_853
-; CHECK-RV32-NEXT: j .LBB61_352
+; CHECK-RV32-NEXT: j .LBB61_351
; CHECK-RV32-NEXT: .LBB61_853: # %cond.load1317
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8331,9 +7693,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 20
-; CHECK-RV32-NEXT: bltz a2, .LBB61_854
-; CHECK-RV32-NEXT: j .LBB61_353
+; CHECK-RV32-NEXT: j .LBB61_352
; CHECK-RV32-NEXT: .LBB61_854: # %cond.load1321
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8344,9 +7704,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 19
-; CHECK-RV32-NEXT: bltz a2, .LBB61_855
-; CHECK-RV32-NEXT: j .LBB61_354
+; CHECK-RV32-NEXT: j .LBB61_353
; CHECK-RV32-NEXT: .LBB61_855: # %cond.load1325
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8357,9 +7715,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 18
-; CHECK-RV32-NEXT: bltz a2, .LBB61_856
-; CHECK-RV32-NEXT: j .LBB61_355
+; CHECK-RV32-NEXT: j .LBB61_354
; CHECK-RV32-NEXT: .LBB61_856: # %cond.load1329
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8370,9 +7726,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 17
-; CHECK-RV32-NEXT: bltz a2, .LBB61_857
-; CHECK-RV32-NEXT: j .LBB61_356
+; CHECK-RV32-NEXT: j .LBB61_355
; CHECK-RV32-NEXT: .LBB61_857: # %cond.load1333
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8383,9 +7737,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 16
-; CHECK-RV32-NEXT: bltz a2, .LBB61_858
-; CHECK-RV32-NEXT: j .LBB61_357
+; CHECK-RV32-NEXT: j .LBB61_356
; CHECK-RV32-NEXT: .LBB61_858: # %cond.load1337
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8396,9 +7748,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 15
-; CHECK-RV32-NEXT: bltz a2, .LBB61_859
-; CHECK-RV32-NEXT: j .LBB61_358
+; CHECK-RV32-NEXT: j .LBB61_357
; CHECK-RV32-NEXT: .LBB61_859: # %cond.load1341
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8409,9 +7759,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 14
-; CHECK-RV32-NEXT: bltz a2, .LBB61_860
-; CHECK-RV32-NEXT: j .LBB61_359
+; CHECK-RV32-NEXT: j .LBB61_358
; CHECK-RV32-NEXT: .LBB61_860: # %cond.load1345
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8422,9 +7770,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 13
-; CHECK-RV32-NEXT: bltz a2, .LBB61_861
-; CHECK-RV32-NEXT: j .LBB61_360
+; CHECK-RV32-NEXT: j .LBB61_359
; CHECK-RV32-NEXT: .LBB61_861: # %cond.load1349
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8435,9 +7781,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 12
-; CHECK-RV32-NEXT: bltz a2, .LBB61_862
-; CHECK-RV32-NEXT: j .LBB61_361
+; CHECK-RV32-NEXT: j .LBB61_360
; CHECK-RV32-NEXT: .LBB61_862: # %cond.load1353
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8448,9 +7792,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 11
-; CHECK-RV32-NEXT: bltz a2, .LBB61_863
-; CHECK-RV32-NEXT: j .LBB61_362
+; CHECK-RV32-NEXT: j .LBB61_361
; CHECK-RV32-NEXT: .LBB61_863: # %cond.load1357
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8461,9 +7803,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 10
-; CHECK-RV32-NEXT: bltz a2, .LBB61_864
-; CHECK-RV32-NEXT: j .LBB61_363
+; CHECK-RV32-NEXT: j .LBB61_362
; CHECK-RV32-NEXT: .LBB61_864: # %cond.load1361
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8474,9 +7814,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 9
-; CHECK-RV32-NEXT: bltz a2, .LBB61_865
-; CHECK-RV32-NEXT: j .LBB61_364
+; CHECK-RV32-NEXT: j .LBB61_363
; CHECK-RV32-NEXT: .LBB61_865: # %cond.load1365
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8487,9 +7825,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 8
-; CHECK-RV32-NEXT: bltz a2, .LBB61_866
-; CHECK-RV32-NEXT: j .LBB61_365
+; CHECK-RV32-NEXT: j .LBB61_364
; CHECK-RV32-NEXT: .LBB61_866: # %cond.load1369
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8500,9 +7836,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 7
-; CHECK-RV32-NEXT: bltz a2, .LBB61_867
-; CHECK-RV32-NEXT: j .LBB61_366
+; CHECK-RV32-NEXT: j .LBB61_365
; CHECK-RV32-NEXT: .LBB61_867: # %cond.load1373
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8513,9 +7847,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 6
-; CHECK-RV32-NEXT: bltz a2, .LBB61_868
-; CHECK-RV32-NEXT: j .LBB61_367
+; CHECK-RV32-NEXT: j .LBB61_366
; CHECK-RV32-NEXT: .LBB61_868: # %cond.load1377
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8526,9 +7858,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 5
-; CHECK-RV32-NEXT: bltz a2, .LBB61_869
-; CHECK-RV32-NEXT: j .LBB61_368
+; CHECK-RV32-NEXT: j .LBB61_367
; CHECK-RV32-NEXT: .LBB61_869: # %cond.load1381
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8539,9 +7869,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 4
-; CHECK-RV32-NEXT: bltz a2, .LBB61_870
-; CHECK-RV32-NEXT: j .LBB61_369
+; CHECK-RV32-NEXT: j .LBB61_368
; CHECK-RV32-NEXT: .LBB61_870: # %cond.load1385
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8552,9 +7880,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 3
-; CHECK-RV32-NEXT: bltz a2, .LBB61_871
-; CHECK-RV32-NEXT: j .LBB61_370
+; CHECK-RV32-NEXT: j .LBB61_369
; CHECK-RV32-NEXT: .LBB61_871: # %cond.load1389
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8565,11 +7891,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 2
-; CHECK-RV32-NEXT: bgez a2, .LBB61_1035
-; CHECK-RV32-NEXT: j .LBB61_371
-; CHECK-RV32-NEXT: .LBB61_1035: # %cond.load1389
-; CHECK-RV32-NEXT: j .LBB61_372
+; CHECK-RV32-NEXT: j .LBB61_370
; CHECK-RV32-NEXT: .LBB61_872: # %cond.load1401
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a3
@@ -8578,9 +7900,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 1
-; CHECK-RV32-NEXT: bnez a3, .LBB61_873
-; CHECK-RV32-NEXT: j .LBB61_376
+; CHECK-RV32-NEXT: j .LBB61_375
; CHECK-RV32-NEXT: .LBB61_873: # %cond.load1405
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8591,9 +7911,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 2
-; CHECK-RV32-NEXT: bnez a3, .LBB61_874
-; CHECK-RV32-NEXT: j .LBB61_377
+; CHECK-RV32-NEXT: j .LBB61_376
; CHECK-RV32-NEXT: .LBB61_874: # %cond.load1409
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8604,9 +7922,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 4
-; CHECK-RV32-NEXT: bnez a3, .LBB61_875
-; CHECK-RV32-NEXT: j .LBB61_378
+; CHECK-RV32-NEXT: j .LBB61_377
; CHECK-RV32-NEXT: .LBB61_875: # %cond.load1413
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8617,9 +7933,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 8
-; CHECK-RV32-NEXT: bnez a3, .LBB61_876
-; CHECK-RV32-NEXT: j .LBB61_379
+; CHECK-RV32-NEXT: j .LBB61_378
; CHECK-RV32-NEXT: .LBB61_876: # %cond.load1417
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8630,9 +7944,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 16
-; CHECK-RV32-NEXT: bnez a3, .LBB61_877
-; CHECK-RV32-NEXT: j .LBB61_380
+; CHECK-RV32-NEXT: j .LBB61_379
; CHECK-RV32-NEXT: .LBB61_877: # %cond.load1421
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8643,9 +7955,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 32
-; CHECK-RV32-NEXT: bnez a3, .LBB61_878
-; CHECK-RV32-NEXT: j .LBB61_381
+; CHECK-RV32-NEXT: j .LBB61_380
; CHECK-RV32-NEXT: .LBB61_878: # %cond.load1425
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8656,9 +7966,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 64
-; CHECK-RV32-NEXT: bnez a3, .LBB61_879
-; CHECK-RV32-NEXT: j .LBB61_382
+; CHECK-RV32-NEXT: j .LBB61_381
; CHECK-RV32-NEXT: .LBB61_879: # %cond.load1429
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8669,9 +7977,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 128
-; CHECK-RV32-NEXT: bnez a3, .LBB61_880
-; CHECK-RV32-NEXT: j .LBB61_383
+; CHECK-RV32-NEXT: j .LBB61_382
; CHECK-RV32-NEXT: .LBB61_880: # %cond.load1433
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8682,9 +7988,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 256
-; CHECK-RV32-NEXT: bnez a3, .LBB61_881
-; CHECK-RV32-NEXT: j .LBB61_384
+; CHECK-RV32-NEXT: j .LBB61_383
; CHECK-RV32-NEXT: .LBB61_881: # %cond.load1437
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8695,9 +7999,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 512
-; CHECK-RV32-NEXT: bnez a3, .LBB61_882
-; CHECK-RV32-NEXT: j .LBB61_385
+; CHECK-RV32-NEXT: j .LBB61_384
; CHECK-RV32-NEXT: .LBB61_882: # %cond.load1441
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8708,9 +8010,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 1024
-; CHECK-RV32-NEXT: bnez a3, .LBB61_883
-; CHECK-RV32-NEXT: j .LBB61_386
+; CHECK-RV32-NEXT: j .LBB61_385
; CHECK-RV32-NEXT: .LBB61_883: # %cond.load1445
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8721,9 +8021,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 20
-; CHECK-RV32-NEXT: bltz a3, .LBB61_884
-; CHECK-RV32-NEXT: j .LBB61_387
+; CHECK-RV32-NEXT: j .LBB61_386
; CHECK-RV32-NEXT: .LBB61_884: # %cond.load1449
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8734,9 +8032,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 19
-; CHECK-RV32-NEXT: bltz a3, .LBB61_885
-; CHECK-RV32-NEXT: j .LBB61_388
+; CHECK-RV32-NEXT: j .LBB61_387
; CHECK-RV32-NEXT: .LBB61_885: # %cond.load1453
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8747,9 +8043,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 18
-; CHECK-RV32-NEXT: bltz a3, .LBB61_886
-; CHECK-RV32-NEXT: j .LBB61_389
+; CHECK-RV32-NEXT: j .LBB61_388
; CHECK-RV32-NEXT: .LBB61_886: # %cond.load1457
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8760,9 +8054,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 17
-; CHECK-RV32-NEXT: bltz a3, .LBB61_887
-; CHECK-RV32-NEXT: j .LBB61_390
+; CHECK-RV32-NEXT: j .LBB61_389
; CHECK-RV32-NEXT: .LBB61_887: # %cond.load1461
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8773,9 +8065,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 16
-; CHECK-RV32-NEXT: bltz a3, .LBB61_888
-; CHECK-RV32-NEXT: j .LBB61_391
+; CHECK-RV32-NEXT: j .LBB61_390
; CHECK-RV32-NEXT: .LBB61_888: # %cond.load1465
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8786,9 +8076,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 15
-; CHECK-RV32-NEXT: bltz a3, .LBB61_889
-; CHECK-RV32-NEXT: j .LBB61_392
+; CHECK-RV32-NEXT: j .LBB61_391
; CHECK-RV32-NEXT: .LBB61_889: # %cond.load1469
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8799,9 +8087,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 14
-; CHECK-RV32-NEXT: bltz a3, .LBB61_890
-; CHECK-RV32-NEXT: j .LBB61_393
+; CHECK-RV32-NEXT: j .LBB61_392
; CHECK-RV32-NEXT: .LBB61_890: # %cond.load1473
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8812,9 +8098,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 13
-; CHECK-RV32-NEXT: bltz a3, .LBB61_891
-; CHECK-RV32-NEXT: j .LBB61_394
+; CHECK-RV32-NEXT: j .LBB61_393
; CHECK-RV32-NEXT: .LBB61_891: # %cond.load1477
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8825,9 +8109,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 12
-; CHECK-RV32-NEXT: bltz a3, .LBB61_892
-; CHECK-RV32-NEXT: j .LBB61_395
+; CHECK-RV32-NEXT: j .LBB61_394
; CHECK-RV32-NEXT: .LBB61_892: # %cond.load1481
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8838,9 +8120,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 11
-; CHECK-RV32-NEXT: bltz a3, .LBB61_893
-; CHECK-RV32-NEXT: j .LBB61_396
+; CHECK-RV32-NEXT: j .LBB61_395
; CHECK-RV32-NEXT: .LBB61_893: # %cond.load1485
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8851,9 +8131,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 10
-; CHECK-RV32-NEXT: bltz a3, .LBB61_894
-; CHECK-RV32-NEXT: j .LBB61_397
+; CHECK-RV32-NEXT: j .LBB61_396
; CHECK-RV32-NEXT: .LBB61_894: # %cond.load1489
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8864,9 +8142,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 9
-; CHECK-RV32-NEXT: bltz a3, .LBB61_895
-; CHECK-RV32-NEXT: j .LBB61_398
+; CHECK-RV32-NEXT: j .LBB61_397
; CHECK-RV32-NEXT: .LBB61_895: # %cond.load1493
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8877,9 +8153,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 8
-; CHECK-RV32-NEXT: bltz a3, .LBB61_896
-; CHECK-RV32-NEXT: j .LBB61_399
+; CHECK-RV32-NEXT: j .LBB61_398
; CHECK-RV32-NEXT: .LBB61_896: # %cond.load1497
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8890,9 +8164,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 7
-; CHECK-RV32-NEXT: bltz a3, .LBB61_897
-; CHECK-RV32-NEXT: j .LBB61_400
+; CHECK-RV32-NEXT: j .LBB61_399
; CHECK-RV32-NEXT: .LBB61_897: # %cond.load1501
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8903,9 +8175,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 6
-; CHECK-RV32-NEXT: bltz a3, .LBB61_898
-; CHECK-RV32-NEXT: j .LBB61_401
+; CHECK-RV32-NEXT: j .LBB61_400
; CHECK-RV32-NEXT: .LBB61_898: # %cond.load1505
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8916,9 +8186,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 5
-; CHECK-RV32-NEXT: bltz a3, .LBB61_899
-; CHECK-RV32-NEXT: j .LBB61_402
+; CHECK-RV32-NEXT: j .LBB61_401
; CHECK-RV32-NEXT: .LBB61_899: # %cond.load1509
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8929,9 +8197,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 4
-; CHECK-RV32-NEXT: bltz a3, .LBB61_900
-; CHECK-RV32-NEXT: j .LBB61_403
+; CHECK-RV32-NEXT: j .LBB61_402
; CHECK-RV32-NEXT: .LBB61_900: # %cond.load1513
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8942,9 +8208,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 3
-; CHECK-RV32-NEXT: bltz a3, .LBB61_901
-; CHECK-RV32-NEXT: j .LBB61_404
+; CHECK-RV32-NEXT: j .LBB61_403
; CHECK-RV32-NEXT: .LBB61_901: # %cond.load1517
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8955,11 +8219,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 2
-; CHECK-RV32-NEXT: bgez a3, .LBB61_1036
-; CHECK-RV32-NEXT: j .LBB61_405
-; CHECK-RV32-NEXT: .LBB61_1036: # %cond.load1517
-; CHECK-RV32-NEXT: j .LBB61_406
+; CHECK-RV32-NEXT: j .LBB61_404
; CHECK-RV32-NEXT: .LBB61_902: # %cond.load1529
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a2
@@ -8968,9 +8228,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 1
-; CHECK-RV32-NEXT: bnez a2, .LBB61_903
-; CHECK-RV32-NEXT: j .LBB61_410
+; CHECK-RV32-NEXT: j .LBB61_409
; CHECK-RV32-NEXT: .LBB61_903: # %cond.load1533
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8981,9 +8239,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 2
-; CHECK-RV32-NEXT: bnez a2, .LBB61_904
-; CHECK-RV32-NEXT: j .LBB61_411
+; CHECK-RV32-NEXT: j .LBB61_410
; CHECK-RV32-NEXT: .LBB61_904: # %cond.load1537
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8994,9 +8250,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 4
-; CHECK-RV32-NEXT: bnez a2, .LBB61_905
-; CHECK-RV32-NEXT: j .LBB61_412
+; CHECK-RV32-NEXT: j .LBB61_411
; CHECK-RV32-NEXT: .LBB61_905: # %cond.load1541
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9007,9 +8261,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 8
-; CHECK-RV32-NEXT: bnez a2, .LBB61_906
-; CHECK-RV32-NEXT: j .LBB61_413
+; CHECK-RV32-NEXT: j .LBB61_412
; CHECK-RV32-NEXT: .LBB61_906: # %cond.load1545
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9020,9 +8272,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 16
-; CHECK-RV32-NEXT: bnez a2, .LBB61_907
-; CHECK-RV32-NEXT: j .LBB61_414
+; CHECK-RV32-NEXT: j .LBB61_413
; CHECK-RV32-NEXT: .LBB61_907: # %cond.load1549
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9033,9 +8283,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 32
-; CHECK-RV32-NEXT: bnez a2, .LBB61_908
-; CHECK-RV32-NEXT: j .LBB61_415
+; CHECK-RV32-NEXT: j .LBB61_414
; CHECK-RV32-NEXT: .LBB61_908: # %cond.load1553
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9046,9 +8294,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 64
-; CHECK-RV32-NEXT: bnez a2, .LBB61_909
-; CHECK-RV32-NEXT: j .LBB61_416
+; CHECK-RV32-NEXT: j .LBB61_415
; CHECK-RV32-NEXT: .LBB61_909: # %cond.load1557
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9059,9 +8305,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 128
-; CHECK-RV32-NEXT: bnez a2, .LBB61_910
-; CHECK-RV32-NEXT: j .LBB61_417
+; CHECK-RV32-NEXT: j .LBB61_416
; CHECK-RV32-NEXT: .LBB61_910: # %cond.load1561
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9072,9 +8316,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 256
-; CHECK-RV32-NEXT: bnez a2, .LBB61_911
-; CHECK-RV32-NEXT: j .LBB61_418
+; CHECK-RV32-NEXT: j .LBB61_417
; CHECK-RV32-NEXT: .LBB61_911: # %cond.load1565
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9085,9 +8327,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 512
-; CHECK-RV32-NEXT: bnez a2, .LBB61_912
-; CHECK-RV32-NEXT: j .LBB61_419
+; CHECK-RV32-NEXT: j .LBB61_418
; CHECK-RV32-NEXT: .LBB61_912: # %cond.load1569
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9098,9 +8338,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 1024
-; CHECK-RV32-NEXT: bnez a2, .LBB61_913
-; CHECK-RV32-NEXT: j .LBB61_420
+; CHECK-RV32-NEXT: j .LBB61_419
; CHECK-RV32-NEXT: .LBB61_913: # %cond.load1573
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9111,9 +8349,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 20
-; CHECK-RV32-NEXT: bltz a2, .LBB61_914
-; CHECK-RV32-NEXT: j .LBB61_421
+; CHECK-RV32-NEXT: j .LBB61_420
; CHECK-RV32-NEXT: .LBB61_914: # %cond.load1577
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9124,9 +8360,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 19
-; CHECK-RV32-NEXT: bltz a2, .LBB61_915
-; CHECK-RV32-NEXT: j .LBB61_422
+; CHECK-RV32-NEXT: j .LBB61_421
; CHECK-RV32-NEXT: .LBB61_915: # %cond.load1581
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9137,9 +8371,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 18
-; CHECK-RV32-NEXT: bltz a2, .LBB61_916
-; CHECK-RV32-NEXT: j .LBB61_423
+; CHECK-RV32-NEXT: j .LBB61_422
; CHECK-RV32-NEXT: .LBB61_916: # %cond.load1585
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9150,9 +8382,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 17
-; CHECK-RV32-NEXT: bltz a2, .LBB61_917
-; CHECK-RV32-NEXT: j .LBB61_424
+; CHECK-RV32-NEXT: j .LBB61_423
; CHECK-RV32-NEXT: .LBB61_917: # %cond.load1589
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9163,9 +8393,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 16
-; CHECK-RV32-NEXT: bltz a2, .LBB61_918
-; CHECK-RV32-NEXT: j .LBB61_425
+; CHECK-RV32-NEXT: j .LBB61_424
; CHECK-RV32-NEXT: .LBB61_918: # %cond.load1593
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9176,9 +8404,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 15
-; CHECK-RV32-NEXT: bltz a2, .LBB61_919
-; CHECK-RV32-NEXT: j .LBB61_426
+; CHECK-RV32-NEXT: j .LBB61_425
; CHECK-RV32-NEXT: .LBB61_919: # %cond.load1597
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9189,9 +8415,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 14
-; CHECK-RV32-NEXT: bltz a2, .LBB61_920
-; CHECK-RV32-NEXT: j .LBB61_427
+; CHECK-RV32-NEXT: j .LBB61_426
; CHECK-RV32-NEXT: .LBB61_920: # %cond.load1601
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9202,9 +8426,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 13
-; CHECK-RV32-NEXT: bltz a2, .LBB61_921
-; CHECK-RV32-NEXT: j .LBB61_428
+; CHECK-RV32-NEXT: j .LBB61_427
; CHECK-RV32-NEXT: .LBB61_921: # %cond.load1605
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9215,9 +8437,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 12
-; CHECK-RV32-NEXT: bltz a2, .LBB61_922
-; CHECK-RV32-NEXT: j .LBB61_429
+; CHECK-RV32-NEXT: j .LBB61_428
; CHECK-RV32-NEXT: .LBB61_922: # %cond.load1609
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9228,9 +8448,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 11
-; CHECK-RV32-NEXT: bltz a2, .LBB61_923
-; CHECK-RV32-NEXT: j .LBB61_430
+; CHECK-RV32-NEXT: j .LBB61_429
; CHECK-RV32-NEXT: .LBB61_923: # %cond.load1613
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9241,9 +8459,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 10
-; CHECK-RV32-NEXT: bltz a2, .LBB61_924
-; CHECK-RV32-NEXT: j .LBB61_431
+; CHECK-RV32-NEXT: j .LBB61_430
; CHECK-RV32-NEXT: .LBB61_924: # %cond.load1617
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9254,9 +8470,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 9
-; CHECK-RV32-NEXT: bltz a2, .LBB61_925
-; CHECK-RV32-NEXT: j .LBB61_432
+; CHECK-RV32-NEXT: j .LBB61_431
; CHECK-RV32-NEXT: .LBB61_925: # %cond.load1621
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9267,9 +8481,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 8
-; CHECK-RV32-NEXT: bltz a2, .LBB61_926
-; CHECK-RV32-NEXT: j .LBB61_433
+; CHECK-RV32-NEXT: j .LBB61_432
; CHECK-RV32-NEXT: .LBB61_926: # %cond.load1625
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9280,9 +8492,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 7
-; CHECK-RV32-NEXT: bltz a2, .LBB61_927
-; CHECK-RV32-NEXT: j .LBB61_434
+; CHECK-RV32-NEXT: j .LBB61_433
; CHECK-RV32-NEXT: .LBB61_927: # %cond.load1629
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9293,9 +8503,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 6
-; CHECK-RV32-NEXT: bltz a2, .LBB61_928
-; CHECK-RV32-NEXT: j .LBB61_435
+; CHECK-RV32-NEXT: j .LBB61_434
; CHECK-RV32-NEXT: .LBB61_928: # %cond.load1633
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9306,9 +8514,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 5
-; CHECK-RV32-NEXT: bltz a2, .LBB61_929
-; CHECK-RV32-NEXT: j .LBB61_436
+; CHECK-RV32-NEXT: j .LBB61_435
; CHECK-RV32-NEXT: .LBB61_929: # %cond.load1637
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9319,9 +8525,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 4
-; CHECK-RV32-NEXT: bltz a2, .LBB61_930
-; CHECK-RV32-NEXT: j .LBB61_437
+; CHECK-RV32-NEXT: j .LBB61_436
; CHECK-RV32-NEXT: .LBB61_930: # %cond.load1641
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9332,9 +8536,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 3
-; CHECK-RV32-NEXT: bltz a2, .LBB61_931
-; CHECK-RV32-NEXT: j .LBB61_438
+; CHECK-RV32-NEXT: j .LBB61_437
; CHECK-RV32-NEXT: .LBB61_931: # %cond.load1645
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9345,11 +8547,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 2
-; CHECK-RV32-NEXT: bgez a2, .LBB61_1037
-; CHECK-RV32-NEXT: j .LBB61_439
-; CHECK-RV32-NEXT: .LBB61_1037: # %cond.load1645
-; CHECK-RV32-NEXT: j .LBB61_440
+; CHECK-RV32-NEXT: j .LBB61_438
; CHECK-RV32-NEXT: .LBB61_932: # %cond.load1657
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a3
@@ -9358,9 +8556,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 1
-; CHECK-RV32-NEXT: bnez a3, .LBB61_933
-; CHECK-RV32-NEXT: j .LBB61_444
+; CHECK-RV32-NEXT: j .LBB61_443
; CHECK-RV32-NEXT: .LBB61_933: # %cond.load1661
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9371,9 +8567,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 2
-; CHECK-RV32-NEXT: bnez a3, .LBB61_934
-; CHECK-RV32-NEXT: j .LBB61_445
+; CHECK-RV32-NEXT: j .LBB61_444
; CHECK-RV32-NEXT: .LBB61_934: # %cond.load1665
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9384,9 +8578,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 4
-; CHECK-RV32-NEXT: bnez a3, .LBB61_935
-; CHECK-RV32-NEXT: j .LBB61_446
+; CHECK-RV32-NEXT: j .LBB61_445
; CHECK-RV32-NEXT: .LBB61_935: # %cond.load1669
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9397,9 +8589,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 8
-; CHECK-RV32-NEXT: bnez a3, .LBB61_936
-; CHECK-RV32-NEXT: j .LBB61_447
+; CHECK-RV32-NEXT: j .LBB61_446
; CHECK-RV32-NEXT: .LBB61_936: # %cond.load1673
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9410,9 +8600,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 16
-; CHECK-RV32-NEXT: bnez a3, .LBB61_937
-; CHECK-RV32-NEXT: j .LBB61_448
+; CHECK-RV32-NEXT: j .LBB61_447
; CHECK-RV32-NEXT: .LBB61_937: # %cond.load1677
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9423,9 +8611,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 32
-; CHECK-RV32-NEXT: bnez a3, .LBB61_938
-; CHECK-RV32-NEXT: j .LBB61_449
+; CHECK-RV32-NEXT: j .LBB61_448
; CHECK-RV32-NEXT: .LBB61_938: # %cond.load1681
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9436,9 +8622,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 64
-; CHECK-RV32-NEXT: bnez a3, .LBB61_939
-; CHECK-RV32-NEXT: j .LBB61_450
+; CHECK-RV32-NEXT: j .LBB61_449
; CHECK-RV32-NEXT: .LBB61_939: # %cond.load1685
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9449,9 +8633,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 128
-; CHECK-RV32-NEXT: bnez a3, .LBB61_940
-; CHECK-RV32-NEXT: j .LBB61_451
+; CHECK-RV32-NEXT: j .LBB61_450
; CHECK-RV32-NEXT: .LBB61_940: # %cond.load1689
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9462,9 +8644,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 256
-; CHECK-RV32-NEXT: bnez a3, .LBB61_941
-; CHECK-RV32-NEXT: j .LBB61_452
+; CHECK-RV32-NEXT: j .LBB61_451
; CHECK-RV32-NEXT: .LBB61_941: # %cond.load1693
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9475,9 +8655,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 512
-; CHECK-RV32-NEXT: bnez a3, .LBB61_942
-; CHECK-RV32-NEXT: j .LBB61_453
+; CHECK-RV32-NEXT: j .LBB61_452
; CHECK-RV32-NEXT: .LBB61_942: # %cond.load1697
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9488,9 +8666,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a3, a2, 1024
-; CHECK-RV32-NEXT: bnez a3, .LBB61_943
-; CHECK-RV32-NEXT: j .LBB61_454
+; CHECK-RV32-NEXT: j .LBB61_453
; CHECK-RV32-NEXT: .LBB61_943: # %cond.load1701
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9501,9 +8677,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 20
-; CHECK-RV32-NEXT: bltz a3, .LBB61_944
-; CHECK-RV32-NEXT: j .LBB61_455
+; CHECK-RV32-NEXT: j .LBB61_454
; CHECK-RV32-NEXT: .LBB61_944: # %cond.load1705
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9514,9 +8688,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 19
-; CHECK-RV32-NEXT: bltz a3, .LBB61_945
-; CHECK-RV32-NEXT: j .LBB61_456
+; CHECK-RV32-NEXT: j .LBB61_455
; CHECK-RV32-NEXT: .LBB61_945: # %cond.load1709
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9527,9 +8699,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 18
-; CHECK-RV32-NEXT: bltz a3, .LBB61_946
-; CHECK-RV32-NEXT: j .LBB61_457
+; CHECK-RV32-NEXT: j .LBB61_456
; CHECK-RV32-NEXT: .LBB61_946: # %cond.load1713
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9540,9 +8710,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 17
-; CHECK-RV32-NEXT: bltz a3, .LBB61_947
-; CHECK-RV32-NEXT: j .LBB61_458
+; CHECK-RV32-NEXT: j .LBB61_457
; CHECK-RV32-NEXT: .LBB61_947: # %cond.load1717
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9553,9 +8721,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 16
-; CHECK-RV32-NEXT: bltz a3, .LBB61_948
-; CHECK-RV32-NEXT: j .LBB61_459
+; CHECK-RV32-NEXT: j .LBB61_458
; CHECK-RV32-NEXT: .LBB61_948: # %cond.load1721
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9566,9 +8732,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 15
-; CHECK-RV32-NEXT: bltz a3, .LBB61_949
-; CHECK-RV32-NEXT: j .LBB61_460
+; CHECK-RV32-NEXT: j .LBB61_459
; CHECK-RV32-NEXT: .LBB61_949: # %cond.load1725
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9579,9 +8743,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 14
-; CHECK-RV32-NEXT: bltz a3, .LBB61_950
-; CHECK-RV32-NEXT: j .LBB61_461
+; CHECK-RV32-NEXT: j .LBB61_460
; CHECK-RV32-NEXT: .LBB61_950: # %cond.load1729
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9592,9 +8754,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 13
-; CHECK-RV32-NEXT: bltz a3, .LBB61_951
-; CHECK-RV32-NEXT: j .LBB61_462
+; CHECK-RV32-NEXT: j .LBB61_461
; CHECK-RV32-NEXT: .LBB61_951: # %cond.load1733
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9605,9 +8765,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 12
-; CHECK-RV32-NEXT: bltz a3, .LBB61_952
-; CHECK-RV32-NEXT: j .LBB61_463
+; CHECK-RV32-NEXT: j .LBB61_462
; CHECK-RV32-NEXT: .LBB61_952: # %cond.load1737
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9618,9 +8776,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 11
-; CHECK-RV32-NEXT: bltz a3, .LBB61_953
-; CHECK-RV32-NEXT: j .LBB61_464
+; CHECK-RV32-NEXT: j .LBB61_463
; CHECK-RV32-NEXT: .LBB61_953: # %cond.load1741
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9631,9 +8787,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 10
-; CHECK-RV32-NEXT: bltz a3, .LBB61_954
-; CHECK-RV32-NEXT: j .LBB61_465
+; CHECK-RV32-NEXT: j .LBB61_464
; CHECK-RV32-NEXT: .LBB61_954: # %cond.load1745
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9644,9 +8798,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 9
-; CHECK-RV32-NEXT: bltz a3, .LBB61_955
-; CHECK-RV32-NEXT: j .LBB61_466
+; CHECK-RV32-NEXT: j .LBB61_465
; CHECK-RV32-NEXT: .LBB61_955: # %cond.load1749
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9657,9 +8809,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 8
-; CHECK-RV32-NEXT: bltz a3, .LBB61_956
-; CHECK-RV32-NEXT: j .LBB61_467
+; CHECK-RV32-NEXT: j .LBB61_466
; CHECK-RV32-NEXT: .LBB61_956: # %cond.load1753
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9670,9 +8820,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 7
-; CHECK-RV32-NEXT: bltz a3, .LBB61_957
-; CHECK-RV32-NEXT: j .LBB61_468
+; CHECK-RV32-NEXT: j .LBB61_467
; CHECK-RV32-NEXT: .LBB61_957: # %cond.load1757
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9683,9 +8831,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 6
-; CHECK-RV32-NEXT: bltz a3, .LBB61_958
-; CHECK-RV32-NEXT: j .LBB61_469
+; CHECK-RV32-NEXT: j .LBB61_468
; CHECK-RV32-NEXT: .LBB61_958: # %cond.load1761
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9696,9 +8842,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 5
-; CHECK-RV32-NEXT: bltz a3, .LBB61_959
-; CHECK-RV32-NEXT: j .LBB61_470
+; CHECK-RV32-NEXT: j .LBB61_469
; CHECK-RV32-NEXT: .LBB61_959: # %cond.load1765
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9709,9 +8853,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 4
-; CHECK-RV32-NEXT: bltz a3, .LBB61_960
-; CHECK-RV32-NEXT: j .LBB61_471
+; CHECK-RV32-NEXT: j .LBB61_470
; CHECK-RV32-NEXT: .LBB61_960: # %cond.load1769
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9722,9 +8864,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 3
-; CHECK-RV32-NEXT: bltz a3, .LBB61_961
-; CHECK-RV32-NEXT: j .LBB61_472
+; CHECK-RV32-NEXT: j .LBB61_471
; CHECK-RV32-NEXT: .LBB61_961: # %cond.load1773
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9735,11 +8875,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a3, a2, 2
-; CHECK-RV32-NEXT: bgez a3, .LBB61_1038
-; CHECK-RV32-NEXT: j .LBB61_473
-; CHECK-RV32-NEXT: .LBB61_1038: # %cond.load1773
-; CHECK-RV32-NEXT: j .LBB61_474
+; CHECK-RV32-NEXT: j .LBB61_472
; CHECK-RV32-NEXT: .LBB61_962: # %cond.load1785
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a2
@@ -9748,9 +8884,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 1
-; CHECK-RV32-NEXT: bnez a2, .LBB61_963
-; CHECK-RV32-NEXT: j .LBB61_478
+; CHECK-RV32-NEXT: j .LBB61_477
; CHECK-RV32-NEXT: .LBB61_963: # %cond.load1789
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9761,9 +8895,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 2
-; CHECK-RV32-NEXT: bnez a2, .LBB61_964
-; CHECK-RV32-NEXT: j .LBB61_479
+; CHECK-RV32-NEXT: j .LBB61_478
; CHECK-RV32-NEXT: .LBB61_964: # %cond.load1793
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9774,9 +8906,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 4
-; CHECK-RV32-NEXT: bnez a2, .LBB61_965
-; CHECK-RV32-NEXT: j .LBB61_480
+; CHECK-RV32-NEXT: j .LBB61_479
; CHECK-RV32-NEXT: .LBB61_965: # %cond.load1797
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9787,9 +8917,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 8
-; CHECK-RV32-NEXT: bnez a2, .LBB61_966
-; CHECK-RV32-NEXT: j .LBB61_481
+; CHECK-RV32-NEXT: j .LBB61_480
; CHECK-RV32-NEXT: .LBB61_966: # %cond.load1801
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9800,9 +8928,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 16
-; CHECK-RV32-NEXT: bnez a2, .LBB61_967
-; CHECK-RV32-NEXT: j .LBB61_482
+; CHECK-RV32-NEXT: j .LBB61_481
; CHECK-RV32-NEXT: .LBB61_967: # %cond.load1805
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9813,9 +8939,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 32
-; CHECK-RV32-NEXT: bnez a2, .LBB61_968
-; CHECK-RV32-NEXT: j .LBB61_483
+; CHECK-RV32-NEXT: j .LBB61_482
; CHECK-RV32-NEXT: .LBB61_968: # %cond.load1809
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9826,9 +8950,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 64
-; CHECK-RV32-NEXT: bnez a2, .LBB61_969
-; CHECK-RV32-NEXT: j .LBB61_484
+; CHECK-RV32-NEXT: j .LBB61_483
; CHECK-RV32-NEXT: .LBB61_969: # %cond.load1813
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9839,9 +8961,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 128
-; CHECK-RV32-NEXT: bnez a2, .LBB61_970
-; CHECK-RV32-NEXT: j .LBB61_485
+; CHECK-RV32-NEXT: j .LBB61_484
; CHECK-RV32-NEXT: .LBB61_970: # %cond.load1817
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9852,9 +8972,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 256
-; CHECK-RV32-NEXT: bnez a2, .LBB61_971
-; CHECK-RV32-NEXT: j .LBB61_486
+; CHECK-RV32-NEXT: j .LBB61_485
; CHECK-RV32-NEXT: .LBB61_971: # %cond.load1821
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9865,9 +8983,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 512
-; CHECK-RV32-NEXT: bnez a2, .LBB61_972
-; CHECK-RV32-NEXT: j .LBB61_487
+; CHECK-RV32-NEXT: j .LBB61_486
; CHECK-RV32-NEXT: .LBB61_972: # %cond.load1825
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9878,9 +8994,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a3, 1024
-; CHECK-RV32-NEXT: bnez a2, .LBB61_973
-; CHECK-RV32-NEXT: j .LBB61_488
+; CHECK-RV32-NEXT: j .LBB61_487
; CHECK-RV32-NEXT: .LBB61_973: # %cond.load1829
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9891,9 +9005,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 20
-; CHECK-RV32-NEXT: bltz a2, .LBB61_974
-; CHECK-RV32-NEXT: j .LBB61_489
+; CHECK-RV32-NEXT: j .LBB61_488
; CHECK-RV32-NEXT: .LBB61_974: # %cond.load1833
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9904,9 +9016,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 19
-; CHECK-RV32-NEXT: bltz a2, .LBB61_975
-; CHECK-RV32-NEXT: j .LBB61_490
+; CHECK-RV32-NEXT: j .LBB61_489
; CHECK-RV32-NEXT: .LBB61_975: # %cond.load1837
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9917,9 +9027,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 18
-; CHECK-RV32-NEXT: bltz a2, .LBB61_976
-; CHECK-RV32-NEXT: j .LBB61_491
+; CHECK-RV32-NEXT: j .LBB61_490
; CHECK-RV32-NEXT: .LBB61_976: # %cond.load1841
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9930,9 +9038,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 17
-; CHECK-RV32-NEXT: bltz a2, .LBB61_977
-; CHECK-RV32-NEXT: j .LBB61_492
+; CHECK-RV32-NEXT: j .LBB61_491
; CHECK-RV32-NEXT: .LBB61_977: # %cond.load1845
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9943,9 +9049,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 16
-; CHECK-RV32-NEXT: bltz a2, .LBB61_978
-; CHECK-RV32-NEXT: j .LBB61_493
+; CHECK-RV32-NEXT: j .LBB61_492
; CHECK-RV32-NEXT: .LBB61_978: # %cond.load1849
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9956,9 +9060,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 15
-; CHECK-RV32-NEXT: bltz a2, .LBB61_979
-; CHECK-RV32-NEXT: j .LBB61_494
+; CHECK-RV32-NEXT: j .LBB61_493
; CHECK-RV32-NEXT: .LBB61_979: # %cond.load1853
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9969,9 +9071,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 14
-; CHECK-RV32-NEXT: bltz a2, .LBB61_980
-; CHECK-RV32-NEXT: j .LBB61_495
+; CHECK-RV32-NEXT: j .LBB61_494
; CHECK-RV32-NEXT: .LBB61_980: # %cond.load1857
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9982,9 +9082,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 13
-; CHECK-RV32-NEXT: bltz a2, .LBB61_981
-; CHECK-RV32-NEXT: j .LBB61_496
+; CHECK-RV32-NEXT: j .LBB61_495
; CHECK-RV32-NEXT: .LBB61_981: # %cond.load1861
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9995,9 +9093,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 12
-; CHECK-RV32-NEXT: bltz a2, .LBB61_982
-; CHECK-RV32-NEXT: j .LBB61_497
+; CHECK-RV32-NEXT: j .LBB61_496
; CHECK-RV32-NEXT: .LBB61_982: # %cond.load1865
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -10008,9 +9104,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 11
-; CHECK-RV32-NEXT: bltz a2, .LBB61_983
-; CHECK-RV32-NEXT: j .LBB61_498
+; CHECK-RV32-NEXT: j .LBB61_497
; CHECK-RV32-NEXT: .LBB61_983: # %cond.load1869
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -10021,9 +9115,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 10
-; CHECK-RV32-NEXT: bltz a2, .LBB61_984
-; CHECK-RV32-NEXT: j .LBB61_499
+; CHECK-RV32-NEXT: j .LBB61_498
; CHECK-RV32-NEXT: .LBB61_984: # %cond.load1873
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -10034,9 +9126,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 9
-; CHECK-RV32-NEXT: bltz a2, .LBB61_985
-; CHECK-RV32-NEXT: j .LBB61_500
+; CHECK-RV32-NEXT: j .LBB61_499
; CHECK-RV32-NEXT: .LBB61_985: # %cond.load1877
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -10047,9 +9137,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 8
-; CHECK-RV32-NEXT: bltz a2, .LBB61_986
-; CHECK-RV32-NEXT: j .LBB61_501
+; CHECK-RV32-NEXT: j .LBB61_500
; CHECK-RV32-NEXT: .LBB61_986: # %cond.load1881
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -10060,9 +9148,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 7
-; CHECK-RV32-NEXT: bltz a2, .LBB61_987
-; CHECK-RV32-NEXT: j .LBB61_502
+; CHECK-RV32-NEXT: j .LBB61_501
; CHECK-RV32-NEXT: .LBB61_987: # %cond.load1885
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -10073,9 +9159,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 6
-; CHECK-RV32-NEXT: bltz a2, .LBB61_988
-; CHECK-RV32-NEXT: j .LBB61_503
+; CHECK-RV32-NEXT: j .LBB61_502
; CHECK-RV32-NEXT: .LBB61_988: # %cond.load1889
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -10086,9 +9170,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 5
-; CHECK-RV32-NEXT: bltz a2, .LBB61_989
-; CHECK-RV32-NEXT: j .LBB61_504
+; CHECK-RV32-NEXT: j .LBB61_503
; CHECK-RV32-NEXT: .LBB61_989: # %cond.load1893
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -10099,9 +9181,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 4
-; CHECK-RV32-NEXT: bltz a2, .LBB61_990
-; CHECK-RV32-NEXT: j .LBB61_505
+; CHECK-RV32-NEXT: j .LBB61_504
; CHECK-RV32-NEXT: .LBB61_990: # %cond.load1897
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -10112,9 +9192,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 3
-; CHECK-RV32-NEXT: bltz a2, .LBB61_991
-; CHECK-RV32-NEXT: j .LBB61_506
+; CHECK-RV32-NEXT: j .LBB61_505
; CHECK-RV32-NEXT: .LBB61_991: # %cond.load1901
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -10125,11 +9203,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a3, 2
-; CHECK-RV32-NEXT: bgez a2, .LBB61_1039
-; CHECK-RV32-NEXT: j .LBB61_507
-; CHECK-RV32-NEXT: .LBB61_1039: # %cond.load1901
-; CHECK-RV32-NEXT: j .LBB61_508
+; CHECK-RV32-NEXT: j .LBB61_506
; CHECK-RV32-NEXT: .LBB61_992: # %cond.load1913
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a2
@@ -10138,9 +9212,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 1
-; CHECK-RV32-NEXT: bnez a2, .LBB61_993
-; CHECK-RV32-NEXT: j .LBB61_512
+; CHECK-RV32-NEXT: j .LBB61_511
; CHECK-RV32-NEXT: .LBB61_993: # %cond.load1917
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10151,9 +9223,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 2
-; CHECK-RV32-NEXT: bnez a2, .LBB61_994
-; CHECK-RV32-NEXT: j .LBB61_513
+; CHECK-RV32-NEXT: j .LBB61_512
; CHECK-RV32-NEXT: .LBB61_994: # %cond.load1921
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10164,9 +9234,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 4
-; CHECK-RV32-NEXT: bnez a2, .LBB61_995
-; CHECK-RV32-NEXT: j .LBB61_514
+; CHECK-RV32-NEXT: j .LBB61_513
; CHECK-RV32-NEXT: .LBB61_995: # %cond.load1925
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10177,9 +9245,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 8
-; CHECK-RV32-NEXT: bnez a2, .LBB61_996
-; CHECK-RV32-NEXT: j .LBB61_515
+; CHECK-RV32-NEXT: j .LBB61_514
; CHECK-RV32-NEXT: .LBB61_996: # %cond.load1929
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10190,9 +9256,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 16
-; CHECK-RV32-NEXT: bnez a2, .LBB61_997
-; CHECK-RV32-NEXT: j .LBB61_516
+; CHECK-RV32-NEXT: j .LBB61_515
; CHECK-RV32-NEXT: .LBB61_997: # %cond.load1933
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10203,9 +9267,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 32
-; CHECK-RV32-NEXT: bnez a2, .LBB61_998
-; CHECK-RV32-NEXT: j .LBB61_517
+; CHECK-RV32-NEXT: j .LBB61_516
; CHECK-RV32-NEXT: .LBB61_998: # %cond.load1937
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10216,9 +9278,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 64
-; CHECK-RV32-NEXT: bnez a2, .LBB61_999
-; CHECK-RV32-NEXT: j .LBB61_518
+; CHECK-RV32-NEXT: j .LBB61_517
; CHECK-RV32-NEXT: .LBB61_999: # %cond.load1941
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10229,9 +9289,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 128
-; CHECK-RV32-NEXT: bnez a2, .LBB61_1000
-; CHECK-RV32-NEXT: j .LBB61_519
+; CHECK-RV32-NEXT: j .LBB61_518
; CHECK-RV32-NEXT: .LBB61_1000: # %cond.load1945
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10242,9 +9300,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 256
-; CHECK-RV32-NEXT: bnez a2, .LBB61_1001
-; CHECK-RV32-NEXT: j .LBB61_520
+; CHECK-RV32-NEXT: j .LBB61_519
; CHECK-RV32-NEXT: .LBB61_1001: # %cond.load1949
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10255,9 +9311,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 512
-; CHECK-RV32-NEXT: bnez a2, .LBB61_1002
-; CHECK-RV32-NEXT: j .LBB61_521
+; CHECK-RV32-NEXT: j .LBB61_520
; CHECK-RV32-NEXT: .LBB61_1002: # %cond.load1953
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10268,9 +9322,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: andi a2, a1, 1024
-; CHECK-RV32-NEXT: bnez a2, .LBB61_1003
-; CHECK-RV32-NEXT: j .LBB61_522
+; CHECK-RV32-NEXT: j .LBB61_521
; CHECK-RV32-NEXT: .LBB61_1003: # %cond.load1957
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10281,9 +9333,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 20
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1004
-; CHECK-RV32-NEXT: j .LBB61_523
+; CHECK-RV32-NEXT: j .LBB61_522
; CHECK-RV32-NEXT: .LBB61_1004: # %cond.load1961
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10294,9 +9344,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 19
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1005
-; CHECK-RV32-NEXT: j .LBB61_524
+; CHECK-RV32-NEXT: j .LBB61_523
; CHECK-RV32-NEXT: .LBB61_1005: # %cond.load1965
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10307,9 +9355,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 18
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1006
-; CHECK-RV32-NEXT: j .LBB61_525
+; CHECK-RV32-NEXT: j .LBB61_524
; CHECK-RV32-NEXT: .LBB61_1006: # %cond.load1969
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10320,9 +9366,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 17
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1007
-; CHECK-RV32-NEXT: j .LBB61_526
+; CHECK-RV32-NEXT: j .LBB61_525
; CHECK-RV32-NEXT: .LBB61_1007: # %cond.load1973
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10333,9 +9377,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 16
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1008
-; CHECK-RV32-NEXT: j .LBB61_527
+; CHECK-RV32-NEXT: j .LBB61_526
; CHECK-RV32-NEXT: .LBB61_1008: # %cond.load1977
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10346,9 +9388,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 15
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1009
-; CHECK-RV32-NEXT: j .LBB61_528
+; CHECK-RV32-NEXT: j .LBB61_527
; CHECK-RV32-NEXT: .LBB61_1009: # %cond.load1981
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10359,9 +9399,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 14
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1010
-; CHECK-RV32-NEXT: j .LBB61_529
+; CHECK-RV32-NEXT: j .LBB61_528
; CHECK-RV32-NEXT: .LBB61_1010: # %cond.load1985
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10372,9 +9410,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 13
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1011
-; CHECK-RV32-NEXT: j .LBB61_530
+; CHECK-RV32-NEXT: j .LBB61_529
; CHECK-RV32-NEXT: .LBB61_1011: # %cond.load1989
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10385,9 +9421,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 12
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1012
-; CHECK-RV32-NEXT: j .LBB61_531
+; CHECK-RV32-NEXT: j .LBB61_530
; CHECK-RV32-NEXT: .LBB61_1012: # %cond.load1993
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10398,9 +9432,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 11
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1013
-; CHECK-RV32-NEXT: j .LBB61_532
+; CHECK-RV32-NEXT: j .LBB61_531
; CHECK-RV32-NEXT: .LBB61_1013: # %cond.load1997
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10411,9 +9443,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 10
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1014
-; CHECK-RV32-NEXT: j .LBB61_533
+; CHECK-RV32-NEXT: j .LBB61_532
; CHECK-RV32-NEXT: .LBB61_1014: # %cond.load2001
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10424,9 +9454,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 9
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1015
-; CHECK-RV32-NEXT: j .LBB61_534
+; CHECK-RV32-NEXT: j .LBB61_533
; CHECK-RV32-NEXT: .LBB61_1015: # %cond.load2005
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10437,9 +9465,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 8
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1016
-; CHECK-RV32-NEXT: j .LBB61_535
+; CHECK-RV32-NEXT: j .LBB61_534
; CHECK-RV32-NEXT: .LBB61_1016: # %cond.load2009
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10450,9 +9476,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 7
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1017
-; CHECK-RV32-NEXT: j .LBB61_536
+; CHECK-RV32-NEXT: j .LBB61_535
; CHECK-RV32-NEXT: .LBB61_1017: # %cond.load2013
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10463,9 +9487,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 6
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1018
-; CHECK-RV32-NEXT: j .LBB61_537
+; CHECK-RV32-NEXT: j .LBB61_536
; CHECK-RV32-NEXT: .LBB61_1018: # %cond.load2017
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10476,9 +9498,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 5
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1019
-; CHECK-RV32-NEXT: j .LBB61_538
+; CHECK-RV32-NEXT: j .LBB61_537
; CHECK-RV32-NEXT: .LBB61_1019: # %cond.load2021
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10489,9 +9509,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 4
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1020
-; CHECK-RV32-NEXT: j .LBB61_539
+; CHECK-RV32-NEXT: j .LBB61_538
; CHECK-RV32-NEXT: .LBB61_1020: # %cond.load2025
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10502,9 +9520,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 3
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1021
-; CHECK-RV32-NEXT: j .LBB61_540
+; CHECK-RV32-NEXT: j .LBB61_539
; CHECK-RV32-NEXT: .LBB61_1021: # %cond.load2029
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10515,9 +9531,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 2
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1022
-; CHECK-RV32-NEXT: j .LBB61_541
+; CHECK-RV32-NEXT: j .LBB61_540
; CHECK-RV32-NEXT: .LBB61_1022: # %cond.load2033
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10528,9 +9542,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: slli a2, a1, 1
-; CHECK-RV32-NEXT: bltz a2, .LBB61_1023
-; CHECK-RV32-NEXT: j .LBB61_542
+; CHECK-RV32-NEXT: j .LBB61_541
; CHECK-RV32-NEXT: .LBB61_1023: # %cond.load2037
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -10802,7 +9814,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_61: # %else238
; CHECK-RV64-NEXT: slli a1, a2, 2
; CHECK-RV64-NEXT: bgez a1, .LBB61_63
-; CHECK-RV64-NEXT: .LBB61_62: # %cond.load241
+; CHECK-RV64-NEXT: # %bb.62: # %cond.load241
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -11082,7 +10094,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_127: # %else494
; CHECK-RV64-NEXT: slli a2, a1, 2
; CHECK-RV64-NEXT: bgez a2, .LBB61_129
-; CHECK-RV64-NEXT: .LBB61_128: # %cond.load497
+; CHECK-RV64-NEXT: # %bb.128: # %cond.load497
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -11362,7 +10374,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_193: # %else750
; CHECK-RV64-NEXT: slli a1, a2, 2
; CHECK-RV64-NEXT: bgez a1, .LBB61_195
-; CHECK-RV64-NEXT: .LBB61_194: # %cond.load753
+; CHECK-RV64-NEXT: # %bb.194: # %cond.load753
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -11642,7 +10654,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_259: # %else1006
; CHECK-RV64-NEXT: slli a2, a1, 2
; CHECK-RV64-NEXT: bgez a2, .LBB61_261
-; CHECK-RV64-NEXT: .LBB61_260: # %cond.load1009
+; CHECK-RV64-NEXT: # %bb.260: # %cond.load1009
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -11922,7 +10934,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_325: # %else1262
; CHECK-RV64-NEXT: slli a1, a2, 2
; CHECK-RV64-NEXT: bgez a1, .LBB61_327
-; CHECK-RV64-NEXT: .LBB61_326: # %cond.load1265
+; CHECK-RV64-NEXT: # %bb.326: # %cond.load1265
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma
@@ -12197,7 +11209,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_391: # %else1518
; CHECK-RV64-NEXT: slli a2, a1, 2
; CHECK-RV64-NEXT: bgez a2, .LBB61_393
-; CHECK-RV64-NEXT: .LBB61_392: # %cond.load1521
+; CHECK-RV64-NEXT: # %bb.392: # %cond.load1521
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma
@@ -12472,7 +11484,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_457: # %else1774
; CHECK-RV64-NEXT: slli a1, a2, 2
; CHECK-RV64-NEXT: bgez a1, .LBB61_459
-; CHECK-RV64-NEXT: .LBB61_458: # %cond.load1777
+; CHECK-RV64-NEXT: # %bb.458: # %cond.load1777
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma
@@ -12765,9 +11777,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 2
-; CHECK-RV64-NEXT: bnez a1, .LBB61_528
-; CHECK-RV64-NEXT: j .LBB61_2
+; CHECK-RV64-NEXT: j .LBB61_1
; CHECK-RV64-NEXT: .LBB61_528: # %cond.load1
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, m1, tu, ma
@@ -12777,9 +11787,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 4
-; CHECK-RV64-NEXT: bnez a1, .LBB61_529
-; CHECK-RV64-NEXT: j .LBB61_3
+; CHECK-RV64-NEXT: j .LBB61_2
; CHECK-RV64-NEXT: .LBB61_529: # %cond.load5
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 3, e8, m1, tu, ma
@@ -12789,9 +11797,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 8
-; CHECK-RV64-NEXT: bnez a1, .LBB61_530
-; CHECK-RV64-NEXT: j .LBB61_4
+; CHECK-RV64-NEXT: j .LBB61_3
; CHECK-RV64-NEXT: .LBB61_530: # %cond.load9
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 4, e8, m1, tu, ma
@@ -12801,9 +11807,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 16
-; CHECK-RV64-NEXT: bnez a1, .LBB61_531
-; CHECK-RV64-NEXT: j .LBB61_5
+; CHECK-RV64-NEXT: j .LBB61_4
; CHECK-RV64-NEXT: .LBB61_531: # %cond.load13
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 5, e8, m1, tu, ma
@@ -12813,9 +11817,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 32
-; CHECK-RV64-NEXT: bnez a1, .LBB61_532
-; CHECK-RV64-NEXT: j .LBB61_6
+; CHECK-RV64-NEXT: j .LBB61_5
; CHECK-RV64-NEXT: .LBB61_532: # %cond.load17
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 6, e8, m1, tu, ma
@@ -12825,9 +11827,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 64
-; CHECK-RV64-NEXT: bnez a1, .LBB61_533
-; CHECK-RV64-NEXT: j .LBB61_7
+; CHECK-RV64-NEXT: j .LBB61_6
; CHECK-RV64-NEXT: .LBB61_533: # %cond.load21
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 7, e8, m1, tu, ma
@@ -12837,9 +11837,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 128
-; CHECK-RV64-NEXT: bnez a1, .LBB61_534
-; CHECK-RV64-NEXT: j .LBB61_8
+; CHECK-RV64-NEXT: j .LBB61_7
; CHECK-RV64-NEXT: .LBB61_534: # %cond.load25
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 8, e8, m1, tu, ma
@@ -12849,9 +11847,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 256
-; CHECK-RV64-NEXT: bnez a1, .LBB61_535
-; CHECK-RV64-NEXT: j .LBB61_9
+; CHECK-RV64-NEXT: j .LBB61_8
; CHECK-RV64-NEXT: .LBB61_535: # %cond.load29
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 9, e8, m1, tu, ma
@@ -12861,9 +11857,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 512
-; CHECK-RV64-NEXT: bnez a1, .LBB61_536
-; CHECK-RV64-NEXT: j .LBB61_10
+; CHECK-RV64-NEXT: j .LBB61_9
; CHECK-RV64-NEXT: .LBB61_536: # %cond.load33
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 10, e8, m1, tu, ma
@@ -12873,9 +11867,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 1024
-; CHECK-RV64-NEXT: bnez a1, .LBB61_537
-; CHECK-RV64-NEXT: j .LBB61_11
+; CHECK-RV64-NEXT: j .LBB61_10
; CHECK-RV64-NEXT: .LBB61_537: # %cond.load37
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 11, e8, m1, tu, ma
@@ -12885,9 +11877,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 52
-; CHECK-RV64-NEXT: bltz a1, .LBB61_538
-; CHECK-RV64-NEXT: j .LBB61_12
+; CHECK-RV64-NEXT: j .LBB61_11
; CHECK-RV64-NEXT: .LBB61_538: # %cond.load41
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 12, e8, m1, tu, ma
@@ -12897,9 +11887,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 51
-; CHECK-RV64-NEXT: bltz a1, .LBB61_539
-; CHECK-RV64-NEXT: j .LBB61_13
+; CHECK-RV64-NEXT: j .LBB61_12
; CHECK-RV64-NEXT: .LBB61_539: # %cond.load45
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 13, e8, m1, tu, ma
@@ -12909,9 +11897,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 50
-; CHECK-RV64-NEXT: bltz a1, .LBB61_540
-; CHECK-RV64-NEXT: j .LBB61_14
+; CHECK-RV64-NEXT: j .LBB61_13
; CHECK-RV64-NEXT: .LBB61_540: # %cond.load49
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 14, e8, m1, tu, ma
@@ -12921,9 +11907,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 49
-; CHECK-RV64-NEXT: bltz a1, .LBB61_541
-; CHECK-RV64-NEXT: j .LBB61_15
+; CHECK-RV64-NEXT: j .LBB61_14
; CHECK-RV64-NEXT: .LBB61_541: # %cond.load53
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 15, e8, m1, tu, ma
@@ -12933,9 +11917,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 48
-; CHECK-RV64-NEXT: bltz a1, .LBB61_542
-; CHECK-RV64-NEXT: j .LBB61_16
+; CHECK-RV64-NEXT: j .LBB61_15
; CHECK-RV64-NEXT: .LBB61_542: # %cond.load57
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 16, e8, m1, tu, ma
@@ -12945,9 +11927,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 47
-; CHECK-RV64-NEXT: bltz a1, .LBB61_543
-; CHECK-RV64-NEXT: j .LBB61_17
+; CHECK-RV64-NEXT: j .LBB61_16
; CHECK-RV64-NEXT: .LBB61_543: # %cond.load61
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 17, e8, m1, tu, ma
@@ -12957,9 +11937,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 46
-; CHECK-RV64-NEXT: bltz a1, .LBB61_544
-; CHECK-RV64-NEXT: j .LBB61_18
+; CHECK-RV64-NEXT: j .LBB61_17
; CHECK-RV64-NEXT: .LBB61_544: # %cond.load65
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 18, e8, m1, tu, ma
@@ -12969,9 +11947,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 45
-; CHECK-RV64-NEXT: bltz a1, .LBB61_545
-; CHECK-RV64-NEXT: j .LBB61_19
+; CHECK-RV64-NEXT: j .LBB61_18
; CHECK-RV64-NEXT: .LBB61_545: # %cond.load69
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 19, e8, m1, tu, ma
@@ -12981,9 +11957,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 44
-; CHECK-RV64-NEXT: bltz a1, .LBB61_546
-; CHECK-RV64-NEXT: j .LBB61_20
+; CHECK-RV64-NEXT: j .LBB61_19
; CHECK-RV64-NEXT: .LBB61_546: # %cond.load73
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 20, e8, m1, tu, ma
@@ -12993,9 +11967,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 43
-; CHECK-RV64-NEXT: bltz a1, .LBB61_547
-; CHECK-RV64-NEXT: j .LBB61_21
+; CHECK-RV64-NEXT: j .LBB61_20
; CHECK-RV64-NEXT: .LBB61_547: # %cond.load77
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 21, e8, m1, tu, ma
@@ -13005,9 +11977,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 42
-; CHECK-RV64-NEXT: bltz a1, .LBB61_548
-; CHECK-RV64-NEXT: j .LBB61_22
+; CHECK-RV64-NEXT: j .LBB61_21
; CHECK-RV64-NEXT: .LBB61_548: # %cond.load81
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 22, e8, m1, tu, ma
@@ -13017,9 +11987,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 41
-; CHECK-RV64-NEXT: bltz a1, .LBB61_549
-; CHECK-RV64-NEXT: j .LBB61_23
+; CHECK-RV64-NEXT: j .LBB61_22
; CHECK-RV64-NEXT: .LBB61_549: # %cond.load85
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 23, e8, m1, tu, ma
@@ -13029,9 +11997,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 40
-; CHECK-RV64-NEXT: bltz a1, .LBB61_550
-; CHECK-RV64-NEXT: j .LBB61_24
+; CHECK-RV64-NEXT: j .LBB61_23
; CHECK-RV64-NEXT: .LBB61_550: # %cond.load89
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 24, e8, m1, tu, ma
@@ -13041,9 +12007,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 39
-; CHECK-RV64-NEXT: bltz a1, .LBB61_551
-; CHECK-RV64-NEXT: j .LBB61_25
+; CHECK-RV64-NEXT: j .LBB61_24
; CHECK-RV64-NEXT: .LBB61_551: # %cond.load93
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 25, e8, m1, tu, ma
@@ -13053,9 +12017,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 38
-; CHECK-RV64-NEXT: bltz a1, .LBB61_552
-; CHECK-RV64-NEXT: j .LBB61_26
+; CHECK-RV64-NEXT: j .LBB61_25
; CHECK-RV64-NEXT: .LBB61_552: # %cond.load97
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 26, e8, m1, tu, ma
@@ -13065,9 +12027,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 37
-; CHECK-RV64-NEXT: bltz a1, .LBB61_553
-; CHECK-RV64-NEXT: j .LBB61_27
+; CHECK-RV64-NEXT: j .LBB61_26
; CHECK-RV64-NEXT: .LBB61_553: # %cond.load101
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 27, e8, m1, tu, ma
@@ -13077,9 +12037,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 36
-; CHECK-RV64-NEXT: bltz a1, .LBB61_554
-; CHECK-RV64-NEXT: j .LBB61_28
+; CHECK-RV64-NEXT: j .LBB61_27
; CHECK-RV64-NEXT: .LBB61_554: # %cond.load105
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 28, e8, m1, tu, ma
@@ -13089,9 +12047,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 35
-; CHECK-RV64-NEXT: bltz a1, .LBB61_555
-; CHECK-RV64-NEXT: j .LBB61_29
+; CHECK-RV64-NEXT: j .LBB61_28
; CHECK-RV64-NEXT: .LBB61_555: # %cond.load109
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 29, e8, m1, tu, ma
@@ -13101,9 +12057,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 34
-; CHECK-RV64-NEXT: bltz a1, .LBB61_556
-; CHECK-RV64-NEXT: j .LBB61_30
+; CHECK-RV64-NEXT: j .LBB61_29
; CHECK-RV64-NEXT: .LBB61_556: # %cond.load113
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 30, e8, m1, tu, ma
@@ -13113,9 +12067,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 33
-; CHECK-RV64-NEXT: bltz a1, .LBB61_557
-; CHECK-RV64-NEXT: j .LBB61_31
+; CHECK-RV64-NEXT: j .LBB61_30
; CHECK-RV64-NEXT: .LBB61_557: # %cond.load117
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 31, e8, m1, tu, ma
@@ -13125,9 +12077,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 32
-; CHECK-RV64-NEXT: bltz a1, .LBB61_558
-; CHECK-RV64-NEXT: j .LBB61_32
+; CHECK-RV64-NEXT: j .LBB61_31
; CHECK-RV64-NEXT: .LBB61_558: # %cond.load121
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13139,9 +12089,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 31
-; CHECK-RV64-NEXT: bltz a1, .LBB61_559
-; CHECK-RV64-NEXT: j .LBB61_33
+; CHECK-RV64-NEXT: j .LBB61_32
; CHECK-RV64-NEXT: .LBB61_559: # %cond.load125
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13154,9 +12102,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 30
-; CHECK-RV64-NEXT: bltz a1, .LBB61_560
-; CHECK-RV64-NEXT: j .LBB61_34
+; CHECK-RV64-NEXT: j .LBB61_33
; CHECK-RV64-NEXT: .LBB61_560: # %cond.load129
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13169,9 +12115,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 29
-; CHECK-RV64-NEXT: bltz a1, .LBB61_561
-; CHECK-RV64-NEXT: j .LBB61_35
+; CHECK-RV64-NEXT: j .LBB61_34
; CHECK-RV64-NEXT: .LBB61_561: # %cond.load133
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13184,9 +12128,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 28
-; CHECK-RV64-NEXT: bltz a1, .LBB61_562
-; CHECK-RV64-NEXT: j .LBB61_36
+; CHECK-RV64-NEXT: j .LBB61_35
; CHECK-RV64-NEXT: .LBB61_562: # %cond.load137
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13199,9 +12141,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 27
-; CHECK-RV64-NEXT: bltz a1, .LBB61_563
-; CHECK-RV64-NEXT: j .LBB61_37
+; CHECK-RV64-NEXT: j .LBB61_36
; CHECK-RV64-NEXT: .LBB61_563: # %cond.load141
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13214,9 +12154,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 26
-; CHECK-RV64-NEXT: bltz a1, .LBB61_564
-; CHECK-RV64-NEXT: j .LBB61_38
+; CHECK-RV64-NEXT: j .LBB61_37
; CHECK-RV64-NEXT: .LBB61_564: # %cond.load145
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13229,9 +12167,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 25
-; CHECK-RV64-NEXT: bltz a1, .LBB61_565
-; CHECK-RV64-NEXT: j .LBB61_39
+; CHECK-RV64-NEXT: j .LBB61_38
; CHECK-RV64-NEXT: .LBB61_565: # %cond.load149
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13244,9 +12180,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 24
-; CHECK-RV64-NEXT: bltz a1, .LBB61_566
-; CHECK-RV64-NEXT: j .LBB61_40
+; CHECK-RV64-NEXT: j .LBB61_39
; CHECK-RV64-NEXT: .LBB61_566: # %cond.load153
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13259,9 +12193,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 23
-; CHECK-RV64-NEXT: bltz a1, .LBB61_567
-; CHECK-RV64-NEXT: j .LBB61_41
+; CHECK-RV64-NEXT: j .LBB61_40
; CHECK-RV64-NEXT: .LBB61_567: # %cond.load157
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13274,9 +12206,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 22
-; CHECK-RV64-NEXT: bltz a1, .LBB61_568
-; CHECK-RV64-NEXT: j .LBB61_42
+; CHECK-RV64-NEXT: j .LBB61_41
; CHECK-RV64-NEXT: .LBB61_568: # %cond.load161
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13289,9 +12219,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 21
-; CHECK-RV64-NEXT: bltz a1, .LBB61_569
-; CHECK-RV64-NEXT: j .LBB61_43
+; CHECK-RV64-NEXT: j .LBB61_42
; CHECK-RV64-NEXT: .LBB61_569: # %cond.load165
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13304,9 +12232,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 20
-; CHECK-RV64-NEXT: bltz a1, .LBB61_570
-; CHECK-RV64-NEXT: j .LBB61_44
+; CHECK-RV64-NEXT: j .LBB61_43
; CHECK-RV64-NEXT: .LBB61_570: # %cond.load169
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13319,9 +12245,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 19
-; CHECK-RV64-NEXT: bltz a1, .LBB61_571
-; CHECK-RV64-NEXT: j .LBB61_45
+; CHECK-RV64-NEXT: j .LBB61_44
; CHECK-RV64-NEXT: .LBB61_571: # %cond.load173
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13334,9 +12258,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 18
-; CHECK-RV64-NEXT: bltz a1, .LBB61_572
-; CHECK-RV64-NEXT: j .LBB61_46
+; CHECK-RV64-NEXT: j .LBB61_45
; CHECK-RV64-NEXT: .LBB61_572: # %cond.load177
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13349,9 +12271,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 17
-; CHECK-RV64-NEXT: bltz a1, .LBB61_573
-; CHECK-RV64-NEXT: j .LBB61_47
+; CHECK-RV64-NEXT: j .LBB61_46
; CHECK-RV64-NEXT: .LBB61_573: # %cond.load181
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13364,9 +12284,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 16
-; CHECK-RV64-NEXT: bltz a1, .LBB61_574
-; CHECK-RV64-NEXT: j .LBB61_48
+; CHECK-RV64-NEXT: j .LBB61_47
; CHECK-RV64-NEXT: .LBB61_574: # %cond.load185
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13379,9 +12297,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 15
-; CHECK-RV64-NEXT: bltz a1, .LBB61_575
-; CHECK-RV64-NEXT: j .LBB61_49
+; CHECK-RV64-NEXT: j .LBB61_48
; CHECK-RV64-NEXT: .LBB61_575: # %cond.load189
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13394,9 +12310,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 14
-; CHECK-RV64-NEXT: bltz a1, .LBB61_576
-; CHECK-RV64-NEXT: j .LBB61_50
+; CHECK-RV64-NEXT: j .LBB61_49
; CHECK-RV64-NEXT: .LBB61_576: # %cond.load193
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13409,9 +12323,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 13
-; CHECK-RV64-NEXT: bltz a1, .LBB61_577
-; CHECK-RV64-NEXT: j .LBB61_51
+; CHECK-RV64-NEXT: j .LBB61_50
; CHECK-RV64-NEXT: .LBB61_577: # %cond.load197
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13424,9 +12336,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 12
-; CHECK-RV64-NEXT: bltz a1, .LBB61_578
-; CHECK-RV64-NEXT: j .LBB61_52
+; CHECK-RV64-NEXT: j .LBB61_51
; CHECK-RV64-NEXT: .LBB61_578: # %cond.load201
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13439,9 +12349,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 11
-; CHECK-RV64-NEXT: bltz a1, .LBB61_579
-; CHECK-RV64-NEXT: j .LBB61_53
+; CHECK-RV64-NEXT: j .LBB61_52
; CHECK-RV64-NEXT: .LBB61_579: # %cond.load205
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13454,9 +12362,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 10
-; CHECK-RV64-NEXT: bltz a1, .LBB61_580
-; CHECK-RV64-NEXT: j .LBB61_54
+; CHECK-RV64-NEXT: j .LBB61_53
; CHECK-RV64-NEXT: .LBB61_580: # %cond.load209
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13469,9 +12375,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 9
-; CHECK-RV64-NEXT: bltz a1, .LBB61_581
-; CHECK-RV64-NEXT: j .LBB61_55
+; CHECK-RV64-NEXT: j .LBB61_54
; CHECK-RV64-NEXT: .LBB61_581: # %cond.load213
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13484,9 +12388,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 8
-; CHECK-RV64-NEXT: bltz a1, .LBB61_582
-; CHECK-RV64-NEXT: j .LBB61_56
+; CHECK-RV64-NEXT: j .LBB61_55
; CHECK-RV64-NEXT: .LBB61_582: # %cond.load217
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13499,9 +12401,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 7
-; CHECK-RV64-NEXT: bltz a1, .LBB61_583
-; CHECK-RV64-NEXT: j .LBB61_57
+; CHECK-RV64-NEXT: j .LBB61_56
; CHECK-RV64-NEXT: .LBB61_583: # %cond.load221
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13514,9 +12414,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 6
-; CHECK-RV64-NEXT: bltz a1, .LBB61_584
-; CHECK-RV64-NEXT: j .LBB61_58
+; CHECK-RV64-NEXT: j .LBB61_57
; CHECK-RV64-NEXT: .LBB61_584: # %cond.load225
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13529,9 +12427,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 5
-; CHECK-RV64-NEXT: bltz a1, .LBB61_585
-; CHECK-RV64-NEXT: j .LBB61_59
+; CHECK-RV64-NEXT: j .LBB61_58
; CHECK-RV64-NEXT: .LBB61_585: # %cond.load229
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13544,9 +12440,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 4
-; CHECK-RV64-NEXT: bltz a1, .LBB61_586
-; CHECK-RV64-NEXT: j .LBB61_60
+; CHECK-RV64-NEXT: j .LBB61_59
; CHECK-RV64-NEXT: .LBB61_586: # %cond.load233
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13559,9 +12453,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 3
-; CHECK-RV64-NEXT: bltz a1, .LBB61_587
-; CHECK-RV64-NEXT: j .LBB61_61
+; CHECK-RV64-NEXT: j .LBB61_60
; CHECK-RV64-NEXT: .LBB61_587: # %cond.load237
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13574,11 +12466,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 2
-; CHECK-RV64-NEXT: bgez a1, .LBB61_1025
-; CHECK-RV64-NEXT: j .LBB61_62
-; CHECK-RV64-NEXT: .LBB61_1025: # %cond.load237
-; CHECK-RV64-NEXT: j .LBB61_63
+; CHECK-RV64-NEXT: j .LBB61_61
; CHECK-RV64-NEXT: .LBB61_588: # %cond.load249
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -13590,9 +12478,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 1
-; CHECK-RV64-NEXT: bnez a2, .LBB61_589
-; CHECK-RV64-NEXT: j .LBB61_67
+; CHECK-RV64-NEXT: j .LBB61_66
; CHECK-RV64-NEXT: .LBB61_589: # %cond.load253
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13605,9 +12491,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 2
-; CHECK-RV64-NEXT: bnez a2, .LBB61_590
-; CHECK-RV64-NEXT: j .LBB61_68
+; CHECK-RV64-NEXT: j .LBB61_67
; CHECK-RV64-NEXT: .LBB61_590: # %cond.load257
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13620,9 +12504,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 4
-; CHECK-RV64-NEXT: bnez a2, .LBB61_591
-; CHECK-RV64-NEXT: j .LBB61_69
+; CHECK-RV64-NEXT: j .LBB61_68
; CHECK-RV64-NEXT: .LBB61_591: # %cond.load261
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13635,9 +12517,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 8
-; CHECK-RV64-NEXT: bnez a2, .LBB61_592
-; CHECK-RV64-NEXT: j .LBB61_70
+; CHECK-RV64-NEXT: j .LBB61_69
; CHECK-RV64-NEXT: .LBB61_592: # %cond.load265
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13650,9 +12530,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 16
-; CHECK-RV64-NEXT: bnez a2, .LBB61_593
-; CHECK-RV64-NEXT: j .LBB61_71
+; CHECK-RV64-NEXT: j .LBB61_70
; CHECK-RV64-NEXT: .LBB61_593: # %cond.load269
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13665,9 +12543,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 32
-; CHECK-RV64-NEXT: bnez a2, .LBB61_594
-; CHECK-RV64-NEXT: j .LBB61_72
+; CHECK-RV64-NEXT: j .LBB61_71
; CHECK-RV64-NEXT: .LBB61_594: # %cond.load273
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13680,9 +12556,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 64
-; CHECK-RV64-NEXT: bnez a2, .LBB61_595
-; CHECK-RV64-NEXT: j .LBB61_73
+; CHECK-RV64-NEXT: j .LBB61_72
; CHECK-RV64-NEXT: .LBB61_595: # %cond.load277
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13695,9 +12569,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 128
-; CHECK-RV64-NEXT: bnez a2, .LBB61_596
-; CHECK-RV64-NEXT: j .LBB61_74
+; CHECK-RV64-NEXT: j .LBB61_73
; CHECK-RV64-NEXT: .LBB61_596: # %cond.load281
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13710,9 +12582,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 256
-; CHECK-RV64-NEXT: bnez a2, .LBB61_597
-; CHECK-RV64-NEXT: j .LBB61_75
+; CHECK-RV64-NEXT: j .LBB61_74
; CHECK-RV64-NEXT: .LBB61_597: # %cond.load285
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13725,9 +12595,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 512
-; CHECK-RV64-NEXT: bnez a2, .LBB61_598
-; CHECK-RV64-NEXT: j .LBB61_76
+; CHECK-RV64-NEXT: j .LBB61_75
; CHECK-RV64-NEXT: .LBB61_598: # %cond.load289
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13740,9 +12608,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 1024
-; CHECK-RV64-NEXT: bnez a2, .LBB61_599
-; CHECK-RV64-NEXT: j .LBB61_77
+; CHECK-RV64-NEXT: j .LBB61_76
; CHECK-RV64-NEXT: .LBB61_599: # %cond.load293
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13755,9 +12621,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 52
-; CHECK-RV64-NEXT: bltz a2, .LBB61_600
-; CHECK-RV64-NEXT: j .LBB61_78
+; CHECK-RV64-NEXT: j .LBB61_77
; CHECK-RV64-NEXT: .LBB61_600: # %cond.load297
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13770,9 +12634,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 51
-; CHECK-RV64-NEXT: bltz a2, .LBB61_601
-; CHECK-RV64-NEXT: j .LBB61_79
+; CHECK-RV64-NEXT: j .LBB61_78
; CHECK-RV64-NEXT: .LBB61_601: # %cond.load301
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13785,9 +12647,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 50
-; CHECK-RV64-NEXT: bltz a2, .LBB61_602
-; CHECK-RV64-NEXT: j .LBB61_80
+; CHECK-RV64-NEXT: j .LBB61_79
; CHECK-RV64-NEXT: .LBB61_602: # %cond.load305
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13800,9 +12660,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 49
-; CHECK-RV64-NEXT: bltz a2, .LBB61_603
-; CHECK-RV64-NEXT: j .LBB61_81
+; CHECK-RV64-NEXT: j .LBB61_80
; CHECK-RV64-NEXT: .LBB61_603: # %cond.load309
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13815,9 +12673,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 48
-; CHECK-RV64-NEXT: bltz a2, .LBB61_604
-; CHECK-RV64-NEXT: j .LBB61_82
+; CHECK-RV64-NEXT: j .LBB61_81
; CHECK-RV64-NEXT: .LBB61_604: # %cond.load313
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13830,9 +12686,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 47
-; CHECK-RV64-NEXT: bltz a2, .LBB61_605
-; CHECK-RV64-NEXT: j .LBB61_83
+; CHECK-RV64-NEXT: j .LBB61_82
; CHECK-RV64-NEXT: .LBB61_605: # %cond.load317
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13845,9 +12699,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 46
-; CHECK-RV64-NEXT: bltz a2, .LBB61_606
-; CHECK-RV64-NEXT: j .LBB61_84
+; CHECK-RV64-NEXT: j .LBB61_83
; CHECK-RV64-NEXT: .LBB61_606: # %cond.load321
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13860,9 +12712,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 45
-; CHECK-RV64-NEXT: bltz a2, .LBB61_607
-; CHECK-RV64-NEXT: j .LBB61_85
+; CHECK-RV64-NEXT: j .LBB61_84
; CHECK-RV64-NEXT: .LBB61_607: # %cond.load325
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13875,9 +12725,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 44
-; CHECK-RV64-NEXT: bltz a2, .LBB61_608
-; CHECK-RV64-NEXT: j .LBB61_86
+; CHECK-RV64-NEXT: j .LBB61_85
; CHECK-RV64-NEXT: .LBB61_608: # %cond.load329
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13890,9 +12738,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 43
-; CHECK-RV64-NEXT: bltz a2, .LBB61_609
-; CHECK-RV64-NEXT: j .LBB61_87
+; CHECK-RV64-NEXT: j .LBB61_86
; CHECK-RV64-NEXT: .LBB61_609: # %cond.load333
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13905,9 +12751,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 42
-; CHECK-RV64-NEXT: bltz a2, .LBB61_610
-; CHECK-RV64-NEXT: j .LBB61_88
+; CHECK-RV64-NEXT: j .LBB61_87
; CHECK-RV64-NEXT: .LBB61_610: # %cond.load337
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13920,9 +12764,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 41
-; CHECK-RV64-NEXT: bltz a2, .LBB61_611
-; CHECK-RV64-NEXT: j .LBB61_89
+; CHECK-RV64-NEXT: j .LBB61_88
; CHECK-RV64-NEXT: .LBB61_611: # %cond.load341
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13935,9 +12777,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 40
-; CHECK-RV64-NEXT: bltz a2, .LBB61_612
-; CHECK-RV64-NEXT: j .LBB61_90
+; CHECK-RV64-NEXT: j .LBB61_89
; CHECK-RV64-NEXT: .LBB61_612: # %cond.load345
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13950,9 +12790,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 39
-; CHECK-RV64-NEXT: bltz a2, .LBB61_613
-; CHECK-RV64-NEXT: j .LBB61_91
+; CHECK-RV64-NEXT: j .LBB61_90
; CHECK-RV64-NEXT: .LBB61_613: # %cond.load349
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13965,9 +12803,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 38
-; CHECK-RV64-NEXT: bltz a2, .LBB61_614
-; CHECK-RV64-NEXT: j .LBB61_92
+; CHECK-RV64-NEXT: j .LBB61_91
; CHECK-RV64-NEXT: .LBB61_614: # %cond.load353
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13980,9 +12816,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 37
-; CHECK-RV64-NEXT: bltz a2, .LBB61_615
-; CHECK-RV64-NEXT: j .LBB61_93
+; CHECK-RV64-NEXT: j .LBB61_92
; CHECK-RV64-NEXT: .LBB61_615: # %cond.load357
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13995,9 +12829,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 36
-; CHECK-RV64-NEXT: bltz a2, .LBB61_616
-; CHECK-RV64-NEXT: j .LBB61_94
+; CHECK-RV64-NEXT: j .LBB61_93
; CHECK-RV64-NEXT: .LBB61_616: # %cond.load361
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14010,9 +12842,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 35
-; CHECK-RV64-NEXT: bltz a2, .LBB61_617
-; CHECK-RV64-NEXT: j .LBB61_95
+; CHECK-RV64-NEXT: j .LBB61_94
; CHECK-RV64-NEXT: .LBB61_617: # %cond.load365
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14025,9 +12855,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 34
-; CHECK-RV64-NEXT: bltz a2, .LBB61_618
-; CHECK-RV64-NEXT: j .LBB61_96
+; CHECK-RV64-NEXT: j .LBB61_95
; CHECK-RV64-NEXT: .LBB61_618: # %cond.load369
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14040,9 +12868,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 33
-; CHECK-RV64-NEXT: bltz a2, .LBB61_619
-; CHECK-RV64-NEXT: j .LBB61_97
+; CHECK-RV64-NEXT: j .LBB61_96
; CHECK-RV64-NEXT: .LBB61_619: # %cond.load373
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14055,9 +12881,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 32
-; CHECK-RV64-NEXT: bltz a2, .LBB61_620
-; CHECK-RV64-NEXT: j .LBB61_98
+; CHECK-RV64-NEXT: j .LBB61_97
; CHECK-RV64-NEXT: .LBB61_620: # %cond.load377
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14070,9 +12894,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 31
-; CHECK-RV64-NEXT: bltz a2, .LBB61_621
-; CHECK-RV64-NEXT: j .LBB61_99
+; CHECK-RV64-NEXT: j .LBB61_98
; CHECK-RV64-NEXT: .LBB61_621: # %cond.load381
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14085,9 +12907,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 30
-; CHECK-RV64-NEXT: bltz a2, .LBB61_622
-; CHECK-RV64-NEXT: j .LBB61_100
+; CHECK-RV64-NEXT: j .LBB61_99
; CHECK-RV64-NEXT: .LBB61_622: # %cond.load385
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14100,9 +12920,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 29
-; CHECK-RV64-NEXT: bltz a2, .LBB61_623
-; CHECK-RV64-NEXT: j .LBB61_101
+; CHECK-RV64-NEXT: j .LBB61_100
; CHECK-RV64-NEXT: .LBB61_623: # %cond.load389
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14115,9 +12933,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 28
-; CHECK-RV64-NEXT: bltz a2, .LBB61_624
-; CHECK-RV64-NEXT: j .LBB61_102
+; CHECK-RV64-NEXT: j .LBB61_101
; CHECK-RV64-NEXT: .LBB61_624: # %cond.load393
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14130,9 +12946,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 27
-; CHECK-RV64-NEXT: bltz a2, .LBB61_625
-; CHECK-RV64-NEXT: j .LBB61_103
+; CHECK-RV64-NEXT: j .LBB61_102
; CHECK-RV64-NEXT: .LBB61_625: # %cond.load397
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14145,9 +12959,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 26
-; CHECK-RV64-NEXT: bltz a2, .LBB61_626
-; CHECK-RV64-NEXT: j .LBB61_104
+; CHECK-RV64-NEXT: j .LBB61_103
; CHECK-RV64-NEXT: .LBB61_626: # %cond.load401
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14160,9 +12972,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 25
-; CHECK-RV64-NEXT: bltz a2, .LBB61_627
-; CHECK-RV64-NEXT: j .LBB61_105
+; CHECK-RV64-NEXT: j .LBB61_104
; CHECK-RV64-NEXT: .LBB61_627: # %cond.load405
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14175,9 +12985,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 24
-; CHECK-RV64-NEXT: bltz a2, .LBB61_628
-; CHECK-RV64-NEXT: j .LBB61_106
+; CHECK-RV64-NEXT: j .LBB61_105
; CHECK-RV64-NEXT: .LBB61_628: # %cond.load409
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14190,9 +12998,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 23
-; CHECK-RV64-NEXT: bltz a2, .LBB61_629
-; CHECK-RV64-NEXT: j .LBB61_107
+; CHECK-RV64-NEXT: j .LBB61_106
; CHECK-RV64-NEXT: .LBB61_629: # %cond.load413
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14205,9 +13011,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 22
-; CHECK-RV64-NEXT: bltz a2, .LBB61_630
-; CHECK-RV64-NEXT: j .LBB61_108
+; CHECK-RV64-NEXT: j .LBB61_107
; CHECK-RV64-NEXT: .LBB61_630: # %cond.load417
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14220,9 +13024,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 21
-; CHECK-RV64-NEXT: bltz a2, .LBB61_631
-; CHECK-RV64-NEXT: j .LBB61_109
+; CHECK-RV64-NEXT: j .LBB61_108
; CHECK-RV64-NEXT: .LBB61_631: # %cond.load421
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14235,9 +13037,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 20
-; CHECK-RV64-NEXT: bltz a2, .LBB61_632
-; CHECK-RV64-NEXT: j .LBB61_110
+; CHECK-RV64-NEXT: j .LBB61_109
; CHECK-RV64-NEXT: .LBB61_632: # %cond.load425
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14250,9 +13050,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 19
-; CHECK-RV64-NEXT: bltz a2, .LBB61_633
-; CHECK-RV64-NEXT: j .LBB61_111
+; CHECK-RV64-NEXT: j .LBB61_110
; CHECK-RV64-NEXT: .LBB61_633: # %cond.load429
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14265,9 +13063,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 18
-; CHECK-RV64-NEXT: bltz a2, .LBB61_634
-; CHECK-RV64-NEXT: j .LBB61_112
+; CHECK-RV64-NEXT: j .LBB61_111
; CHECK-RV64-NEXT: .LBB61_634: # %cond.load433
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14280,9 +13076,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 17
-; CHECK-RV64-NEXT: bltz a2, .LBB61_635
-; CHECK-RV64-NEXT: j .LBB61_113
+; CHECK-RV64-NEXT: j .LBB61_112
; CHECK-RV64-NEXT: .LBB61_635: # %cond.load437
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14295,9 +13089,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 16
-; CHECK-RV64-NEXT: bltz a2, .LBB61_636
-; CHECK-RV64-NEXT: j .LBB61_114
+; CHECK-RV64-NEXT: j .LBB61_113
; CHECK-RV64-NEXT: .LBB61_636: # %cond.load441
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14310,9 +13102,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 15
-; CHECK-RV64-NEXT: bltz a2, .LBB61_637
-; CHECK-RV64-NEXT: j .LBB61_115
+; CHECK-RV64-NEXT: j .LBB61_114
; CHECK-RV64-NEXT: .LBB61_637: # %cond.load445
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14325,9 +13115,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 14
-; CHECK-RV64-NEXT: bltz a2, .LBB61_638
-; CHECK-RV64-NEXT: j .LBB61_116
+; CHECK-RV64-NEXT: j .LBB61_115
; CHECK-RV64-NEXT: .LBB61_638: # %cond.load449
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14340,9 +13128,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 13
-; CHECK-RV64-NEXT: bltz a2, .LBB61_639
-; CHECK-RV64-NEXT: j .LBB61_117
+; CHECK-RV64-NEXT: j .LBB61_116
; CHECK-RV64-NEXT: .LBB61_639: # %cond.load453
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14355,9 +13141,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 12
-; CHECK-RV64-NEXT: bltz a2, .LBB61_640
-; CHECK-RV64-NEXT: j .LBB61_118
+; CHECK-RV64-NEXT: j .LBB61_117
; CHECK-RV64-NEXT: .LBB61_640: # %cond.load457
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14370,9 +13154,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 11
-; CHECK-RV64-NEXT: bltz a2, .LBB61_641
-; CHECK-RV64-NEXT: j .LBB61_119
+; CHECK-RV64-NEXT: j .LBB61_118
; CHECK-RV64-NEXT: .LBB61_641: # %cond.load461
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14385,9 +13167,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 10
-; CHECK-RV64-NEXT: bltz a2, .LBB61_642
-; CHECK-RV64-NEXT: j .LBB61_120
+; CHECK-RV64-NEXT: j .LBB61_119
; CHECK-RV64-NEXT: .LBB61_642: # %cond.load465
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14400,9 +13180,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 9
-; CHECK-RV64-NEXT: bltz a2, .LBB61_643
-; CHECK-RV64-NEXT: j .LBB61_121
+; CHECK-RV64-NEXT: j .LBB61_120
; CHECK-RV64-NEXT: .LBB61_643: # %cond.load469
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14415,9 +13193,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 8
-; CHECK-RV64-NEXT: bltz a2, .LBB61_644
-; CHECK-RV64-NEXT: j .LBB61_122
+; CHECK-RV64-NEXT: j .LBB61_121
; CHECK-RV64-NEXT: .LBB61_644: # %cond.load473
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14430,9 +13206,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 7
-; CHECK-RV64-NEXT: bltz a2, .LBB61_645
-; CHECK-RV64-NEXT: j .LBB61_123
+; CHECK-RV64-NEXT: j .LBB61_122
; CHECK-RV64-NEXT: .LBB61_645: # %cond.load477
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14445,9 +13219,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 6
-; CHECK-RV64-NEXT: bltz a2, .LBB61_646
-; CHECK-RV64-NEXT: j .LBB61_124
+; CHECK-RV64-NEXT: j .LBB61_123
; CHECK-RV64-NEXT: .LBB61_646: # %cond.load481
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14460,9 +13232,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 5
-; CHECK-RV64-NEXT: bltz a2, .LBB61_647
-; CHECK-RV64-NEXT: j .LBB61_125
+; CHECK-RV64-NEXT: j .LBB61_124
; CHECK-RV64-NEXT: .LBB61_647: # %cond.load485
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14475,9 +13245,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 4
-; CHECK-RV64-NEXT: bltz a2, .LBB61_648
-; CHECK-RV64-NEXT: j .LBB61_126
+; CHECK-RV64-NEXT: j .LBB61_125
; CHECK-RV64-NEXT: .LBB61_648: # %cond.load489
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14490,9 +13258,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 3
-; CHECK-RV64-NEXT: bltz a2, .LBB61_649
-; CHECK-RV64-NEXT: j .LBB61_127
+; CHECK-RV64-NEXT: j .LBB61_126
; CHECK-RV64-NEXT: .LBB61_649: # %cond.load493
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14505,11 +13271,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 2
-; CHECK-RV64-NEXT: bgez a2, .LBB61_1026
-; CHECK-RV64-NEXT: j .LBB61_128
-; CHECK-RV64-NEXT: .LBB61_1026: # %cond.load493
-; CHECK-RV64-NEXT: j .LBB61_129
+; CHECK-RV64-NEXT: j .LBB61_127
; CHECK-RV64-NEXT: .LBB61_650: # %cond.load505
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -14521,9 +13283,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 1
-; CHECK-RV64-NEXT: bnez a1, .LBB61_651
-; CHECK-RV64-NEXT: j .LBB61_133
+; CHECK-RV64-NEXT: j .LBB61_132
; CHECK-RV64-NEXT: .LBB61_651: # %cond.load509
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14536,9 +13296,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 2
-; CHECK-RV64-NEXT: bnez a1, .LBB61_652
-; CHECK-RV64-NEXT: j .LBB61_134
+; CHECK-RV64-NEXT: j .LBB61_133
; CHECK-RV64-NEXT: .LBB61_652: # %cond.load513
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14551,9 +13309,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 4
-; CHECK-RV64-NEXT: bnez a1, .LBB61_653
-; CHECK-RV64-NEXT: j .LBB61_135
+; CHECK-RV64-NEXT: j .LBB61_134
; CHECK-RV64-NEXT: .LBB61_653: # %cond.load517
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14566,9 +13322,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 8
-; CHECK-RV64-NEXT: bnez a1, .LBB61_654
-; CHECK-RV64-NEXT: j .LBB61_136
+; CHECK-RV64-NEXT: j .LBB61_135
; CHECK-RV64-NEXT: .LBB61_654: # %cond.load521
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14581,9 +13335,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 16
-; CHECK-RV64-NEXT: bnez a1, .LBB61_655
-; CHECK-RV64-NEXT: j .LBB61_137
+; CHECK-RV64-NEXT: j .LBB61_136
; CHECK-RV64-NEXT: .LBB61_655: # %cond.load525
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14596,9 +13348,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 32
-; CHECK-RV64-NEXT: bnez a1, .LBB61_656
-; CHECK-RV64-NEXT: j .LBB61_138
+; CHECK-RV64-NEXT: j .LBB61_137
; CHECK-RV64-NEXT: .LBB61_656: # %cond.load529
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14611,9 +13361,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 64
-; CHECK-RV64-NEXT: bnez a1, .LBB61_657
-; CHECK-RV64-NEXT: j .LBB61_139
+; CHECK-RV64-NEXT: j .LBB61_138
; CHECK-RV64-NEXT: .LBB61_657: # %cond.load533
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14626,9 +13374,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 128
-; CHECK-RV64-NEXT: bnez a1, .LBB61_658
-; CHECK-RV64-NEXT: j .LBB61_140
+; CHECK-RV64-NEXT: j .LBB61_139
; CHECK-RV64-NEXT: .LBB61_658: # %cond.load537
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14641,9 +13387,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 256
-; CHECK-RV64-NEXT: bnez a1, .LBB61_659
-; CHECK-RV64-NEXT: j .LBB61_141
+; CHECK-RV64-NEXT: j .LBB61_140
; CHECK-RV64-NEXT: .LBB61_659: # %cond.load541
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14656,9 +13400,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 512
-; CHECK-RV64-NEXT: bnez a1, .LBB61_660
-; CHECK-RV64-NEXT: j .LBB61_142
+; CHECK-RV64-NEXT: j .LBB61_141
; CHECK-RV64-NEXT: .LBB61_660: # %cond.load545
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14671,9 +13413,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 1024
-; CHECK-RV64-NEXT: bnez a1, .LBB61_661
-; CHECK-RV64-NEXT: j .LBB61_143
+; CHECK-RV64-NEXT: j .LBB61_142
; CHECK-RV64-NEXT: .LBB61_661: # %cond.load549
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14686,9 +13426,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 52
-; CHECK-RV64-NEXT: bltz a1, .LBB61_662
-; CHECK-RV64-NEXT: j .LBB61_144
+; CHECK-RV64-NEXT: j .LBB61_143
; CHECK-RV64-NEXT: .LBB61_662: # %cond.load553
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14701,9 +13439,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 51
-; CHECK-RV64-NEXT: bltz a1, .LBB61_663
-; CHECK-RV64-NEXT: j .LBB61_145
+; CHECK-RV64-NEXT: j .LBB61_144
; CHECK-RV64-NEXT: .LBB61_663: # %cond.load557
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14716,9 +13452,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 50
-; CHECK-RV64-NEXT: bltz a1, .LBB61_664
-; CHECK-RV64-NEXT: j .LBB61_146
+; CHECK-RV64-NEXT: j .LBB61_145
; CHECK-RV64-NEXT: .LBB61_664: # %cond.load561
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14731,9 +13465,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 49
-; CHECK-RV64-NEXT: bltz a1, .LBB61_665
-; CHECK-RV64-NEXT: j .LBB61_147
+; CHECK-RV64-NEXT: j .LBB61_146
; CHECK-RV64-NEXT: .LBB61_665: # %cond.load565
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14746,9 +13478,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 48
-; CHECK-RV64-NEXT: bltz a1, .LBB61_666
-; CHECK-RV64-NEXT: j .LBB61_148
+; CHECK-RV64-NEXT: j .LBB61_147
; CHECK-RV64-NEXT: .LBB61_666: # %cond.load569
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14761,9 +13491,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 47
-; CHECK-RV64-NEXT: bltz a1, .LBB61_667
-; CHECK-RV64-NEXT: j .LBB61_149
+; CHECK-RV64-NEXT: j .LBB61_148
; CHECK-RV64-NEXT: .LBB61_667: # %cond.load573
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14776,9 +13504,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 46
-; CHECK-RV64-NEXT: bltz a1, .LBB61_668
-; CHECK-RV64-NEXT: j .LBB61_150
+; CHECK-RV64-NEXT: j .LBB61_149
; CHECK-RV64-NEXT: .LBB61_668: # %cond.load577
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14791,9 +13517,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 45
-; CHECK-RV64-NEXT: bltz a1, .LBB61_669
-; CHECK-RV64-NEXT: j .LBB61_151
+; CHECK-RV64-NEXT: j .LBB61_150
; CHECK-RV64-NEXT: .LBB61_669: # %cond.load581
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14806,9 +13530,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 44
-; CHECK-RV64-NEXT: bltz a1, .LBB61_670
-; CHECK-RV64-NEXT: j .LBB61_152
+; CHECK-RV64-NEXT: j .LBB61_151
; CHECK-RV64-NEXT: .LBB61_670: # %cond.load585
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14821,9 +13543,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 43
-; CHECK-RV64-NEXT: bltz a1, .LBB61_671
-; CHECK-RV64-NEXT: j .LBB61_153
+; CHECK-RV64-NEXT: j .LBB61_152
; CHECK-RV64-NEXT: .LBB61_671: # %cond.load589
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14836,9 +13556,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 42
-; CHECK-RV64-NEXT: bltz a1, .LBB61_672
-; CHECK-RV64-NEXT: j .LBB61_154
+; CHECK-RV64-NEXT: j .LBB61_153
; CHECK-RV64-NEXT: .LBB61_672: # %cond.load593
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14851,9 +13569,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 41
-; CHECK-RV64-NEXT: bltz a1, .LBB61_673
-; CHECK-RV64-NEXT: j .LBB61_155
+; CHECK-RV64-NEXT: j .LBB61_154
; CHECK-RV64-NEXT: .LBB61_673: # %cond.load597
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14866,9 +13582,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 40
-; CHECK-RV64-NEXT: bltz a1, .LBB61_674
-; CHECK-RV64-NEXT: j .LBB61_156
+; CHECK-RV64-NEXT: j .LBB61_155
; CHECK-RV64-NEXT: .LBB61_674: # %cond.load601
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14881,9 +13595,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 39
-; CHECK-RV64-NEXT: bltz a1, .LBB61_675
-; CHECK-RV64-NEXT: j .LBB61_157
+; CHECK-RV64-NEXT: j .LBB61_156
; CHECK-RV64-NEXT: .LBB61_675: # %cond.load605
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14896,9 +13608,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 38
-; CHECK-RV64-NEXT: bltz a1, .LBB61_676
-; CHECK-RV64-NEXT: j .LBB61_158
+; CHECK-RV64-NEXT: j .LBB61_157
; CHECK-RV64-NEXT: .LBB61_676: # %cond.load609
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14911,9 +13621,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 37
-; CHECK-RV64-NEXT: bltz a1, .LBB61_677
-; CHECK-RV64-NEXT: j .LBB61_159
+; CHECK-RV64-NEXT: j .LBB61_158
; CHECK-RV64-NEXT: .LBB61_677: # %cond.load613
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14926,9 +13634,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 36
-; CHECK-RV64-NEXT: bltz a1, .LBB61_678
-; CHECK-RV64-NEXT: j .LBB61_160
+; CHECK-RV64-NEXT: j .LBB61_159
; CHECK-RV64-NEXT: .LBB61_678: # %cond.load617
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14941,9 +13647,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 35
-; CHECK-RV64-NEXT: bltz a1, .LBB61_679
-; CHECK-RV64-NEXT: j .LBB61_161
+; CHECK-RV64-NEXT: j .LBB61_160
; CHECK-RV64-NEXT: .LBB61_679: # %cond.load621
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14956,9 +13660,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 34
-; CHECK-RV64-NEXT: bltz a1, .LBB61_680
-; CHECK-RV64-NEXT: j .LBB61_162
+; CHECK-RV64-NEXT: j .LBB61_161
; CHECK-RV64-NEXT: .LBB61_680: # %cond.load625
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14971,9 +13673,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 33
-; CHECK-RV64-NEXT: bltz a1, .LBB61_681
-; CHECK-RV64-NEXT: j .LBB61_163
+; CHECK-RV64-NEXT: j .LBB61_162
; CHECK-RV64-NEXT: .LBB61_681: # %cond.load629
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14986,9 +13686,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 32
-; CHECK-RV64-NEXT: bltz a1, .LBB61_682
-; CHECK-RV64-NEXT: j .LBB61_164
+; CHECK-RV64-NEXT: j .LBB61_163
; CHECK-RV64-NEXT: .LBB61_682: # %cond.load633
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15001,9 +13699,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 31
-; CHECK-RV64-NEXT: bltz a1, .LBB61_683
-; CHECK-RV64-NEXT: j .LBB61_165
+; CHECK-RV64-NEXT: j .LBB61_164
; CHECK-RV64-NEXT: .LBB61_683: # %cond.load637
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15016,9 +13712,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 30
-; CHECK-RV64-NEXT: bltz a1, .LBB61_684
-; CHECK-RV64-NEXT: j .LBB61_166
+; CHECK-RV64-NEXT: j .LBB61_165
; CHECK-RV64-NEXT: .LBB61_684: # %cond.load641
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15031,9 +13725,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 29
-; CHECK-RV64-NEXT: bltz a1, .LBB61_685
-; CHECK-RV64-NEXT: j .LBB61_167
+; CHECK-RV64-NEXT: j .LBB61_166
; CHECK-RV64-NEXT: .LBB61_685: # %cond.load645
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15046,9 +13738,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 28
-; CHECK-RV64-NEXT: bltz a1, .LBB61_686
-; CHECK-RV64-NEXT: j .LBB61_168
+; CHECK-RV64-NEXT: j .LBB61_167
; CHECK-RV64-NEXT: .LBB61_686: # %cond.load649
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15061,9 +13751,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 27
-; CHECK-RV64-NEXT: bltz a1, .LBB61_687
-; CHECK-RV64-NEXT: j .LBB61_169
+; CHECK-RV64-NEXT: j .LBB61_168
; CHECK-RV64-NEXT: .LBB61_687: # %cond.load653
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15076,9 +13764,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 26
-; CHECK-RV64-NEXT: bltz a1, .LBB61_688
-; CHECK-RV64-NEXT: j .LBB61_170
+; CHECK-RV64-NEXT: j .LBB61_169
; CHECK-RV64-NEXT: .LBB61_688: # %cond.load657
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15091,9 +13777,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 25
-; CHECK-RV64-NEXT: bltz a1, .LBB61_689
-; CHECK-RV64-NEXT: j .LBB61_171
+; CHECK-RV64-NEXT: j .LBB61_170
; CHECK-RV64-NEXT: .LBB61_689: # %cond.load661
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15106,9 +13790,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 24
-; CHECK-RV64-NEXT: bltz a1, .LBB61_690
-; CHECK-RV64-NEXT: j .LBB61_172
+; CHECK-RV64-NEXT: j .LBB61_171
; CHECK-RV64-NEXT: .LBB61_690: # %cond.load665
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15121,9 +13803,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 23
-; CHECK-RV64-NEXT: bltz a1, .LBB61_691
-; CHECK-RV64-NEXT: j .LBB61_173
+; CHECK-RV64-NEXT: j .LBB61_172
; CHECK-RV64-NEXT: .LBB61_691: # %cond.load669
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15136,9 +13816,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 22
-; CHECK-RV64-NEXT: bltz a1, .LBB61_692
-; CHECK-RV64-NEXT: j .LBB61_174
+; CHECK-RV64-NEXT: j .LBB61_173
; CHECK-RV64-NEXT: .LBB61_692: # %cond.load673
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15151,9 +13829,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 21
-; CHECK-RV64-NEXT: bltz a1, .LBB61_693
-; CHECK-RV64-NEXT: j .LBB61_175
+; CHECK-RV64-NEXT: j .LBB61_174
; CHECK-RV64-NEXT: .LBB61_693: # %cond.load677
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15166,9 +13842,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 20
-; CHECK-RV64-NEXT: bltz a1, .LBB61_694
-; CHECK-RV64-NEXT: j .LBB61_176
+; CHECK-RV64-NEXT: j .LBB61_175
; CHECK-RV64-NEXT: .LBB61_694: # %cond.load681
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15181,9 +13855,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 19
-; CHECK-RV64-NEXT: bltz a1, .LBB61_695
-; CHECK-RV64-NEXT: j .LBB61_177
+; CHECK-RV64-NEXT: j .LBB61_176
; CHECK-RV64-NEXT: .LBB61_695: # %cond.load685
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15196,9 +13868,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 18
-; CHECK-RV64-NEXT: bltz a1, .LBB61_696
-; CHECK-RV64-NEXT: j .LBB61_178
+; CHECK-RV64-NEXT: j .LBB61_177
; CHECK-RV64-NEXT: .LBB61_696: # %cond.load689
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15211,9 +13881,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 17
-; CHECK-RV64-NEXT: bltz a1, .LBB61_697
-; CHECK-RV64-NEXT: j .LBB61_179
+; CHECK-RV64-NEXT: j .LBB61_178
; CHECK-RV64-NEXT: .LBB61_697: # %cond.load693
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15226,9 +13894,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 16
-; CHECK-RV64-NEXT: bltz a1, .LBB61_698
-; CHECK-RV64-NEXT: j .LBB61_180
+; CHECK-RV64-NEXT: j .LBB61_179
; CHECK-RV64-NEXT: .LBB61_698: # %cond.load697
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15241,9 +13907,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 15
-; CHECK-RV64-NEXT: bltz a1, .LBB61_699
-; CHECK-RV64-NEXT: j .LBB61_181
+; CHECK-RV64-NEXT: j .LBB61_180
; CHECK-RV64-NEXT: .LBB61_699: # %cond.load701
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15256,9 +13920,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 14
-; CHECK-RV64-NEXT: bltz a1, .LBB61_700
-; CHECK-RV64-NEXT: j .LBB61_182
+; CHECK-RV64-NEXT: j .LBB61_181
; CHECK-RV64-NEXT: .LBB61_700: # %cond.load705
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15271,9 +13933,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 13
-; CHECK-RV64-NEXT: bltz a1, .LBB61_701
-; CHECK-RV64-NEXT: j .LBB61_183
+; CHECK-RV64-NEXT: j .LBB61_182
; CHECK-RV64-NEXT: .LBB61_701: # %cond.load709
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15286,9 +13946,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 12
-; CHECK-RV64-NEXT: bltz a1, .LBB61_702
-; CHECK-RV64-NEXT: j .LBB61_184
+; CHECK-RV64-NEXT: j .LBB61_183
; CHECK-RV64-NEXT: .LBB61_702: # %cond.load713
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15301,9 +13959,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 11
-; CHECK-RV64-NEXT: bltz a1, .LBB61_703
-; CHECK-RV64-NEXT: j .LBB61_185
+; CHECK-RV64-NEXT: j .LBB61_184
; CHECK-RV64-NEXT: .LBB61_703: # %cond.load717
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15316,9 +13972,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 10
-; CHECK-RV64-NEXT: bltz a1, .LBB61_704
-; CHECK-RV64-NEXT: j .LBB61_186
+; CHECK-RV64-NEXT: j .LBB61_185
; CHECK-RV64-NEXT: .LBB61_704: # %cond.load721
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15331,9 +13985,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 9
-; CHECK-RV64-NEXT: bltz a1, .LBB61_705
-; CHECK-RV64-NEXT: j .LBB61_187
+; CHECK-RV64-NEXT: j .LBB61_186
; CHECK-RV64-NEXT: .LBB61_705: # %cond.load725
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15346,9 +13998,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 8
-; CHECK-RV64-NEXT: bltz a1, .LBB61_706
-; CHECK-RV64-NEXT: j .LBB61_188
+; CHECK-RV64-NEXT: j .LBB61_187
; CHECK-RV64-NEXT: .LBB61_706: # %cond.load729
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15361,9 +14011,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 7
-; CHECK-RV64-NEXT: bltz a1, .LBB61_707
-; CHECK-RV64-NEXT: j .LBB61_189
+; CHECK-RV64-NEXT: j .LBB61_188
; CHECK-RV64-NEXT: .LBB61_707: # %cond.load733
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15376,9 +14024,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 6
-; CHECK-RV64-NEXT: bltz a1, .LBB61_708
-; CHECK-RV64-NEXT: j .LBB61_190
+; CHECK-RV64-NEXT: j .LBB61_189
; CHECK-RV64-NEXT: .LBB61_708: # %cond.load737
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15391,9 +14037,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 5
-; CHECK-RV64-NEXT: bltz a1, .LBB61_709
-; CHECK-RV64-NEXT: j .LBB61_191
+; CHECK-RV64-NEXT: j .LBB61_190
; CHECK-RV64-NEXT: .LBB61_709: # %cond.load741
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15406,9 +14050,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 4
-; CHECK-RV64-NEXT: bltz a1, .LBB61_710
-; CHECK-RV64-NEXT: j .LBB61_192
+; CHECK-RV64-NEXT: j .LBB61_191
; CHECK-RV64-NEXT: .LBB61_710: # %cond.load745
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15421,9 +14063,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 3
-; CHECK-RV64-NEXT: bltz a1, .LBB61_711
-; CHECK-RV64-NEXT: j .LBB61_193
+; CHECK-RV64-NEXT: j .LBB61_192
; CHECK-RV64-NEXT: .LBB61_711: # %cond.load749
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15436,11 +14076,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a1, a2, 2
-; CHECK-RV64-NEXT: bgez a1, .LBB61_1027
-; CHECK-RV64-NEXT: j .LBB61_194
-; CHECK-RV64-NEXT: .LBB61_1027: # %cond.load749
-; CHECK-RV64-NEXT: j .LBB61_195
+; CHECK-RV64-NEXT: j .LBB61_193
; CHECK-RV64-NEXT: .LBB61_712: # %cond.load761
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -15452,9 +14088,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 1
-; CHECK-RV64-NEXT: bnez a2, .LBB61_713
-; CHECK-RV64-NEXT: j .LBB61_199
+; CHECK-RV64-NEXT: j .LBB61_198
; CHECK-RV64-NEXT: .LBB61_713: # %cond.load765
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15467,9 +14101,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 2
-; CHECK-RV64-NEXT: bnez a2, .LBB61_714
-; CHECK-RV64-NEXT: j .LBB61_200
+; CHECK-RV64-NEXT: j .LBB61_199
; CHECK-RV64-NEXT: .LBB61_714: # %cond.load769
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15482,9 +14114,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 4
-; CHECK-RV64-NEXT: bnez a2, .LBB61_715
-; CHECK-RV64-NEXT: j .LBB61_201
+; CHECK-RV64-NEXT: j .LBB61_200
; CHECK-RV64-NEXT: .LBB61_715: # %cond.load773
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15497,9 +14127,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 8
-; CHECK-RV64-NEXT: bnez a2, .LBB61_716
-; CHECK-RV64-NEXT: j .LBB61_202
+; CHECK-RV64-NEXT: j .LBB61_201
; CHECK-RV64-NEXT: .LBB61_716: # %cond.load777
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15512,9 +14140,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 16
-; CHECK-RV64-NEXT: bnez a2, .LBB61_717
-; CHECK-RV64-NEXT: j .LBB61_203
+; CHECK-RV64-NEXT: j .LBB61_202
; CHECK-RV64-NEXT: .LBB61_717: # %cond.load781
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15527,9 +14153,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 32
-; CHECK-RV64-NEXT: bnez a2, .LBB61_718
-; CHECK-RV64-NEXT: j .LBB61_204
+; CHECK-RV64-NEXT: j .LBB61_203
; CHECK-RV64-NEXT: .LBB61_718: # %cond.load785
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15542,9 +14166,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 64
-; CHECK-RV64-NEXT: bnez a2, .LBB61_719
-; CHECK-RV64-NEXT: j .LBB61_205
+; CHECK-RV64-NEXT: j .LBB61_204
; CHECK-RV64-NEXT: .LBB61_719: # %cond.load789
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15557,9 +14179,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 128
-; CHECK-RV64-NEXT: bnez a2, .LBB61_720
-; CHECK-RV64-NEXT: j .LBB61_206
+; CHECK-RV64-NEXT: j .LBB61_205
; CHECK-RV64-NEXT: .LBB61_720: # %cond.load793
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15572,9 +14192,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 256
-; CHECK-RV64-NEXT: bnez a2, .LBB61_721
-; CHECK-RV64-NEXT: j .LBB61_207
+; CHECK-RV64-NEXT: j .LBB61_206
; CHECK-RV64-NEXT: .LBB61_721: # %cond.load797
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15587,9 +14205,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 512
-; CHECK-RV64-NEXT: bnez a2, .LBB61_722
-; CHECK-RV64-NEXT: j .LBB61_208
+; CHECK-RV64-NEXT: j .LBB61_207
; CHECK-RV64-NEXT: .LBB61_722: # %cond.load801
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15602,9 +14218,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a2, a1, 1024
-; CHECK-RV64-NEXT: bnez a2, .LBB61_723
-; CHECK-RV64-NEXT: j .LBB61_209
+; CHECK-RV64-NEXT: j .LBB61_208
; CHECK-RV64-NEXT: .LBB61_723: # %cond.load805
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15617,9 +14231,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 52
-; CHECK-RV64-NEXT: bltz a2, .LBB61_724
-; CHECK-RV64-NEXT: j .LBB61_210
+; CHECK-RV64-NEXT: j .LBB61_209
; CHECK-RV64-NEXT: .LBB61_724: # %cond.load809
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15632,9 +14244,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 51
-; CHECK-RV64-NEXT: bltz a2, .LBB61_725
-; CHECK-RV64-NEXT: j .LBB61_211
+; CHECK-RV64-NEXT: j .LBB61_210
; CHECK-RV64-NEXT: .LBB61_725: # %cond.load813
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15647,9 +14257,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 50
-; CHECK-RV64-NEXT: bltz a2, .LBB61_726
-; CHECK-RV64-NEXT: j .LBB61_212
+; CHECK-RV64-NEXT: j .LBB61_211
; CHECK-RV64-NEXT: .LBB61_726: # %cond.load817
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15662,9 +14270,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 49
-; CHECK-RV64-NEXT: bltz a2, .LBB61_727
-; CHECK-RV64-NEXT: j .LBB61_213
+; CHECK-RV64-NEXT: j .LBB61_212
; CHECK-RV64-NEXT: .LBB61_727: # %cond.load821
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15677,9 +14283,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 48
-; CHECK-RV64-NEXT: bltz a2, .LBB61_728
-; CHECK-RV64-NEXT: j .LBB61_214
+; CHECK-RV64-NEXT: j .LBB61_213
; CHECK-RV64-NEXT: .LBB61_728: # %cond.load825
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15692,9 +14296,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 47
-; CHECK-RV64-NEXT: bltz a2, .LBB61_729
-; CHECK-RV64-NEXT: j .LBB61_215
+; CHECK-RV64-NEXT: j .LBB61_214
; CHECK-RV64-NEXT: .LBB61_729: # %cond.load829
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15707,9 +14309,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 46
-; CHECK-RV64-NEXT: bltz a2, .LBB61_730
-; CHECK-RV64-NEXT: j .LBB61_216
+; CHECK-RV64-NEXT: j .LBB61_215
; CHECK-RV64-NEXT: .LBB61_730: # %cond.load833
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15722,9 +14322,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 45
-; CHECK-RV64-NEXT: bltz a2, .LBB61_731
-; CHECK-RV64-NEXT: j .LBB61_217
+; CHECK-RV64-NEXT: j .LBB61_216
; CHECK-RV64-NEXT: .LBB61_731: # %cond.load837
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15737,9 +14335,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 44
-; CHECK-RV64-NEXT: bltz a2, .LBB61_732
-; CHECK-RV64-NEXT: j .LBB61_218
+; CHECK-RV64-NEXT: j .LBB61_217
; CHECK-RV64-NEXT: .LBB61_732: # %cond.load841
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15752,9 +14348,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 43
-; CHECK-RV64-NEXT: bltz a2, .LBB61_733
-; CHECK-RV64-NEXT: j .LBB61_219
+; CHECK-RV64-NEXT: j .LBB61_218
; CHECK-RV64-NEXT: .LBB61_733: # %cond.load845
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15767,9 +14361,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 42
-; CHECK-RV64-NEXT: bltz a2, .LBB61_734
-; CHECK-RV64-NEXT: j .LBB61_220
+; CHECK-RV64-NEXT: j .LBB61_219
; CHECK-RV64-NEXT: .LBB61_734: # %cond.load849
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15782,9 +14374,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 41
-; CHECK-RV64-NEXT: bltz a2, .LBB61_735
-; CHECK-RV64-NEXT: j .LBB61_221
+; CHECK-RV64-NEXT: j .LBB61_220
; CHECK-RV64-NEXT: .LBB61_735: # %cond.load853
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15797,9 +14387,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 40
-; CHECK-RV64-NEXT: bltz a2, .LBB61_736
-; CHECK-RV64-NEXT: j .LBB61_222
+; CHECK-RV64-NEXT: j .LBB61_221
; CHECK-RV64-NEXT: .LBB61_736: # %cond.load857
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15812,9 +14400,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 39
-; CHECK-RV64-NEXT: bltz a2, .LBB61_737
-; CHECK-RV64-NEXT: j .LBB61_223
+; CHECK-RV64-NEXT: j .LBB61_222
; CHECK-RV64-NEXT: .LBB61_737: # %cond.load861
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15827,9 +14413,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 38
-; CHECK-RV64-NEXT: bltz a2, .LBB61_738
-; CHECK-RV64-NEXT: j .LBB61_224
+; CHECK-RV64-NEXT: j .LBB61_223
; CHECK-RV64-NEXT: .LBB61_738: # %cond.load865
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15842,9 +14426,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 37
-; CHECK-RV64-NEXT: bltz a2, .LBB61_739
-; CHECK-RV64-NEXT: j .LBB61_225
+; CHECK-RV64-NEXT: j .LBB61_224
; CHECK-RV64-NEXT: .LBB61_739: # %cond.load869
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15857,9 +14439,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 36
-; CHECK-RV64-NEXT: bltz a2, .LBB61_740
-; CHECK-RV64-NEXT: j .LBB61_226
+; CHECK-RV64-NEXT: j .LBB61_225
; CHECK-RV64-NEXT: .LBB61_740: # %cond.load873
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15872,9 +14452,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 35
-; CHECK-RV64-NEXT: bltz a2, .LBB61_741
-; CHECK-RV64-NEXT: j .LBB61_227
+; CHECK-RV64-NEXT: j .LBB61_226
; CHECK-RV64-NEXT: .LBB61_741: # %cond.load877
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15887,9 +14465,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 34
-; CHECK-RV64-NEXT: bltz a2, .LBB61_742
-; CHECK-RV64-NEXT: j .LBB61_228
+; CHECK-RV64-NEXT: j .LBB61_227
; CHECK-RV64-NEXT: .LBB61_742: # %cond.load881
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15902,9 +14478,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 33
-; CHECK-RV64-NEXT: bltz a2, .LBB61_743
-; CHECK-RV64-NEXT: j .LBB61_229
+; CHECK-RV64-NEXT: j .LBB61_228
; CHECK-RV64-NEXT: .LBB61_743: # %cond.load885
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15917,9 +14491,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 32
-; CHECK-RV64-NEXT: bltz a2, .LBB61_744
-; CHECK-RV64-NEXT: j .LBB61_230
+; CHECK-RV64-NEXT: j .LBB61_229
; CHECK-RV64-NEXT: .LBB61_744: # %cond.load889
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15932,9 +14504,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 31
-; CHECK-RV64-NEXT: bltz a2, .LBB61_745
-; CHECK-RV64-NEXT: j .LBB61_231
+; CHECK-RV64-NEXT: j .LBB61_230
; CHECK-RV64-NEXT: .LBB61_745: # %cond.load893
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15947,9 +14517,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 30
-; CHECK-RV64-NEXT: bltz a2, .LBB61_746
-; CHECK-RV64-NEXT: j .LBB61_232
+; CHECK-RV64-NEXT: j .LBB61_231
; CHECK-RV64-NEXT: .LBB61_746: # %cond.load897
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15962,9 +14530,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 29
-; CHECK-RV64-NEXT: bltz a2, .LBB61_747
-; CHECK-RV64-NEXT: j .LBB61_233
+; CHECK-RV64-NEXT: j .LBB61_232
; CHECK-RV64-NEXT: .LBB61_747: # %cond.load901
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15977,9 +14543,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 28
-; CHECK-RV64-NEXT: bltz a2, .LBB61_748
-; CHECK-RV64-NEXT: j .LBB61_234
+; CHECK-RV64-NEXT: j .LBB61_233
; CHECK-RV64-NEXT: .LBB61_748: # %cond.load905
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -15992,9 +14556,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 27
-; CHECK-RV64-NEXT: bltz a2, .LBB61_749
-; CHECK-RV64-NEXT: j .LBB61_235
+; CHECK-RV64-NEXT: j .LBB61_234
; CHECK-RV64-NEXT: .LBB61_749: # %cond.load909
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16007,9 +14569,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 26
-; CHECK-RV64-NEXT: bltz a2, .LBB61_750
-; CHECK-RV64-NEXT: j .LBB61_236
+; CHECK-RV64-NEXT: j .LBB61_235
; CHECK-RV64-NEXT: .LBB61_750: # %cond.load913
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16022,9 +14582,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 25
-; CHECK-RV64-NEXT: bltz a2, .LBB61_751
-; CHECK-RV64-NEXT: j .LBB61_237
+; CHECK-RV64-NEXT: j .LBB61_236
; CHECK-RV64-NEXT: .LBB61_751: # %cond.load917
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16037,9 +14595,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 24
-; CHECK-RV64-NEXT: bltz a2, .LBB61_752
-; CHECK-RV64-NEXT: j .LBB61_238
+; CHECK-RV64-NEXT: j .LBB61_237
; CHECK-RV64-NEXT: .LBB61_752: # %cond.load921
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16052,9 +14608,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 23
-; CHECK-RV64-NEXT: bltz a2, .LBB61_753
-; CHECK-RV64-NEXT: j .LBB61_239
+; CHECK-RV64-NEXT: j .LBB61_238
; CHECK-RV64-NEXT: .LBB61_753: # %cond.load925
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16067,9 +14621,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 22
-; CHECK-RV64-NEXT: bltz a2, .LBB61_754
-; CHECK-RV64-NEXT: j .LBB61_240
+; CHECK-RV64-NEXT: j .LBB61_239
; CHECK-RV64-NEXT: .LBB61_754: # %cond.load929
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16082,9 +14634,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 21
-; CHECK-RV64-NEXT: bltz a2, .LBB61_755
-; CHECK-RV64-NEXT: j .LBB61_241
+; CHECK-RV64-NEXT: j .LBB61_240
; CHECK-RV64-NEXT: .LBB61_755: # %cond.load933
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16097,9 +14647,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 20
-; CHECK-RV64-NEXT: bltz a2, .LBB61_756
-; CHECK-RV64-NEXT: j .LBB61_242
+; CHECK-RV64-NEXT: j .LBB61_241
; CHECK-RV64-NEXT: .LBB61_756: # %cond.load937
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16112,9 +14660,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 19
-; CHECK-RV64-NEXT: bltz a2, .LBB61_757
-; CHECK-RV64-NEXT: j .LBB61_243
+; CHECK-RV64-NEXT: j .LBB61_242
; CHECK-RV64-NEXT: .LBB61_757: # %cond.load941
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16127,9 +14673,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 18
-; CHECK-RV64-NEXT: bltz a2, .LBB61_758
-; CHECK-RV64-NEXT: j .LBB61_244
+; CHECK-RV64-NEXT: j .LBB61_243
; CHECK-RV64-NEXT: .LBB61_758: # %cond.load945
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16142,9 +14686,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 17
-; CHECK-RV64-NEXT: bltz a2, .LBB61_759
-; CHECK-RV64-NEXT: j .LBB61_245
+; CHECK-RV64-NEXT: j .LBB61_244
; CHECK-RV64-NEXT: .LBB61_759: # %cond.load949
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16157,9 +14699,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 16
-; CHECK-RV64-NEXT: bltz a2, .LBB61_760
-; CHECK-RV64-NEXT: j .LBB61_246
+; CHECK-RV64-NEXT: j .LBB61_245
; CHECK-RV64-NEXT: .LBB61_760: # %cond.load953
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16172,9 +14712,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 15
-; CHECK-RV64-NEXT: bltz a2, .LBB61_761
-; CHECK-RV64-NEXT: j .LBB61_247
+; CHECK-RV64-NEXT: j .LBB61_246
; CHECK-RV64-NEXT: .LBB61_761: # %cond.load957
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16187,9 +14725,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 14
-; CHECK-RV64-NEXT: bltz a2, .LBB61_762
-; CHECK-RV64-NEXT: j .LBB61_248
+; CHECK-RV64-NEXT: j .LBB61_247
; CHECK-RV64-NEXT: .LBB61_762: # %cond.load961
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16202,9 +14738,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 13
-; CHECK-RV64-NEXT: bltz a2, .LBB61_763
-; CHECK-RV64-NEXT: j .LBB61_249
+; CHECK-RV64-NEXT: j .LBB61_248
; CHECK-RV64-NEXT: .LBB61_763: # %cond.load965
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16217,9 +14751,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 12
-; CHECK-RV64-NEXT: bltz a2, .LBB61_764
-; CHECK-RV64-NEXT: j .LBB61_250
+; CHECK-RV64-NEXT: j .LBB61_249
; CHECK-RV64-NEXT: .LBB61_764: # %cond.load969
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16232,9 +14764,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 11
-; CHECK-RV64-NEXT: bltz a2, .LBB61_765
-; CHECK-RV64-NEXT: j .LBB61_251
+; CHECK-RV64-NEXT: j .LBB61_250
; CHECK-RV64-NEXT: .LBB61_765: # %cond.load973
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16247,9 +14777,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 10
-; CHECK-RV64-NEXT: bltz a2, .LBB61_766
-; CHECK-RV64-NEXT: j .LBB61_252
+; CHECK-RV64-NEXT: j .LBB61_251
; CHECK-RV64-NEXT: .LBB61_766: # %cond.load977
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16262,9 +14790,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 9
-; CHECK-RV64-NEXT: bltz a2, .LBB61_767
-; CHECK-RV64-NEXT: j .LBB61_253
+; CHECK-RV64-NEXT: j .LBB61_252
; CHECK-RV64-NEXT: .LBB61_767: # %cond.load981
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16277,9 +14803,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 8
-; CHECK-RV64-NEXT: bltz a2, .LBB61_768
-; CHECK-RV64-NEXT: j .LBB61_254
+; CHECK-RV64-NEXT: j .LBB61_253
; CHECK-RV64-NEXT: .LBB61_768: # %cond.load985
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16292,9 +14816,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 7
-; CHECK-RV64-NEXT: bltz a2, .LBB61_769
-; CHECK-RV64-NEXT: j .LBB61_255
+; CHECK-RV64-NEXT: j .LBB61_254
; CHECK-RV64-NEXT: .LBB61_769: # %cond.load989
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16307,9 +14829,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 6
-; CHECK-RV64-NEXT: bltz a2, .LBB61_770
-; CHECK-RV64-NEXT: j .LBB61_256
+; CHECK-RV64-NEXT: j .LBB61_255
; CHECK-RV64-NEXT: .LBB61_770: # %cond.load993
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16322,9 +14842,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 5
-; CHECK-RV64-NEXT: bltz a2, .LBB61_771
-; CHECK-RV64-NEXT: j .LBB61_257
+; CHECK-RV64-NEXT: j .LBB61_256
; CHECK-RV64-NEXT: .LBB61_771: # %cond.load997
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16337,9 +14855,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 4
-; CHECK-RV64-NEXT: bltz a2, .LBB61_772
-; CHECK-RV64-NEXT: j .LBB61_258
+; CHECK-RV64-NEXT: j .LBB61_257
; CHECK-RV64-NEXT: .LBB61_772: # %cond.load1001
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16352,9 +14868,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 3
-; CHECK-RV64-NEXT: bltz a2, .LBB61_773
-; CHECK-RV64-NEXT: j .LBB61_259
+; CHECK-RV64-NEXT: j .LBB61_258
; CHECK-RV64-NEXT: .LBB61_773: # %cond.load1005
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16367,11 +14881,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: slli a2, a1, 2
-; CHECK-RV64-NEXT: bgez a2, .LBB61_1028
-; CHECK-RV64-NEXT: j .LBB61_260
-; CHECK-RV64-NEXT: .LBB61_1028: # %cond.load1005
-; CHECK-RV64-NEXT: j .LBB61_261
+; CHECK-RV64-NEXT: j .LBB61_259
; CHECK-RV64-NEXT: .LBB61_774: # %cond.load1017
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -16383,9 +14893,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: andi a1, a2, 1
-; CHECK-RV64-NEXT: bnez a1, .LBB61_775
-; CHECK-RV64-NEXT: j .LBB61_265
+; CHECK-RV64-NEXT: j .LBB61_264
; CHECK-RV64-NEXT: .LBB61_775: # %cond.load1021
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16396,9 +14904,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 2
-; CHECK-RV64-NEXT: bnez a1, .LBB61_776
-; CHECK-RV64-NEXT: j .LBB61_266
+; CHECK-RV64-NEXT: j .LBB61_265
; CHECK-RV64-NEXT: .LBB61_776: # %cond.load1025
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16409,9 +14915,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 4
-; CHECK-RV64-NEXT: bnez a1, .LBB61_777
-; CHECK-RV64-NEXT: j .LBB61_267
+; CHECK-RV64-NEXT: j .LBB61_266
; CHECK-RV64-NEXT: .LBB61_777: # %cond.load1029
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16422,9 +14926,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 8
-; CHECK-RV64-NEXT: bnez a1, .LBB61_778
-; CHECK-RV64-NEXT: j .LBB61_268
+; CHECK-RV64-NEXT: j .LBB61_267
; CHECK-RV64-NEXT: .LBB61_778: # %cond.load1033
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16435,9 +14937,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 16
-; CHECK-RV64-NEXT: bnez a1, .LBB61_779
-; CHECK-RV64-NEXT: j .LBB61_269
+; CHECK-RV64-NEXT: j .LBB61_268
; CHECK-RV64-NEXT: .LBB61_779: # %cond.load1037
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16448,9 +14948,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 32
-; CHECK-RV64-NEXT: bnez a1, .LBB61_780
-; CHECK-RV64-NEXT: j .LBB61_270
+; CHECK-RV64-NEXT: j .LBB61_269
; CHECK-RV64-NEXT: .LBB61_780: # %cond.load1041
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16461,9 +14959,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 64
-; CHECK-RV64-NEXT: bnez a1, .LBB61_781
-; CHECK-RV64-NEXT: j .LBB61_271
+; CHECK-RV64-NEXT: j .LBB61_270
; CHECK-RV64-NEXT: .LBB61_781: # %cond.load1045
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16474,9 +14970,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 128
-; CHECK-RV64-NEXT: bnez a1, .LBB61_782
-; CHECK-RV64-NEXT: j .LBB61_272
+; CHECK-RV64-NEXT: j .LBB61_271
; CHECK-RV64-NEXT: .LBB61_782: # %cond.load1049
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16487,9 +14981,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 256
-; CHECK-RV64-NEXT: bnez a1, .LBB61_783
-; CHECK-RV64-NEXT: j .LBB61_273
+; CHECK-RV64-NEXT: j .LBB61_272
; CHECK-RV64-NEXT: .LBB61_783: # %cond.load1053
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16500,9 +14992,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 512
-; CHECK-RV64-NEXT: bnez a1, .LBB61_784
-; CHECK-RV64-NEXT: j .LBB61_274
+; CHECK-RV64-NEXT: j .LBB61_273
; CHECK-RV64-NEXT: .LBB61_784: # %cond.load1057
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16513,9 +15003,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 1024
-; CHECK-RV64-NEXT: bnez a1, .LBB61_785
-; CHECK-RV64-NEXT: j .LBB61_275
+; CHECK-RV64-NEXT: j .LBB61_274
; CHECK-RV64-NEXT: .LBB61_785: # %cond.load1061
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16526,9 +15014,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 52
-; CHECK-RV64-NEXT: bltz a1, .LBB61_786
-; CHECK-RV64-NEXT: j .LBB61_276
+; CHECK-RV64-NEXT: j .LBB61_275
; CHECK-RV64-NEXT: .LBB61_786: # %cond.load1065
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16539,9 +15025,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 51
-; CHECK-RV64-NEXT: bltz a1, .LBB61_787
-; CHECK-RV64-NEXT: j .LBB61_277
+; CHECK-RV64-NEXT: j .LBB61_276
; CHECK-RV64-NEXT: .LBB61_787: # %cond.load1069
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16552,9 +15036,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 50
-; CHECK-RV64-NEXT: bltz a1, .LBB61_788
-; CHECK-RV64-NEXT: j .LBB61_278
+; CHECK-RV64-NEXT: j .LBB61_277
; CHECK-RV64-NEXT: .LBB61_788: # %cond.load1073
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16565,9 +15047,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 49
-; CHECK-RV64-NEXT: bltz a1, .LBB61_789
-; CHECK-RV64-NEXT: j .LBB61_279
+; CHECK-RV64-NEXT: j .LBB61_278
; CHECK-RV64-NEXT: .LBB61_789: # %cond.load1077
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16578,9 +15058,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 48
-; CHECK-RV64-NEXT: bltz a1, .LBB61_790
-; CHECK-RV64-NEXT: j .LBB61_280
+; CHECK-RV64-NEXT: j .LBB61_279
; CHECK-RV64-NEXT: .LBB61_790: # %cond.load1081
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16591,9 +15069,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 47
-; CHECK-RV64-NEXT: bltz a1, .LBB61_791
-; CHECK-RV64-NEXT: j .LBB61_281
+; CHECK-RV64-NEXT: j .LBB61_280
; CHECK-RV64-NEXT: .LBB61_791: # %cond.load1085
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16604,9 +15080,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 46
-; CHECK-RV64-NEXT: bltz a1, .LBB61_792
-; CHECK-RV64-NEXT: j .LBB61_282
+; CHECK-RV64-NEXT: j .LBB61_281
; CHECK-RV64-NEXT: .LBB61_792: # %cond.load1089
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16617,9 +15091,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 45
-; CHECK-RV64-NEXT: bltz a1, .LBB61_793
-; CHECK-RV64-NEXT: j .LBB61_283
+; CHECK-RV64-NEXT: j .LBB61_282
; CHECK-RV64-NEXT: .LBB61_793: # %cond.load1093
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16630,9 +15102,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 44
-; CHECK-RV64-NEXT: bltz a1, .LBB61_794
-; CHECK-RV64-NEXT: j .LBB61_284
+; CHECK-RV64-NEXT: j .LBB61_283
; CHECK-RV64-NEXT: .LBB61_794: # %cond.load1097
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16643,9 +15113,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 43
-; CHECK-RV64-NEXT: bltz a1, .LBB61_795
-; CHECK-RV64-NEXT: j .LBB61_285
+; CHECK-RV64-NEXT: j .LBB61_284
; CHECK-RV64-NEXT: .LBB61_795: # %cond.load1101
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16656,9 +15124,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 42
-; CHECK-RV64-NEXT: bltz a1, .LBB61_796
-; CHECK-RV64-NEXT: j .LBB61_286
+; CHECK-RV64-NEXT: j .LBB61_285
; CHECK-RV64-NEXT: .LBB61_796: # %cond.load1105
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16669,9 +15135,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 41
-; CHECK-RV64-NEXT: bltz a1, .LBB61_797
-; CHECK-RV64-NEXT: j .LBB61_287
+; CHECK-RV64-NEXT: j .LBB61_286
; CHECK-RV64-NEXT: .LBB61_797: # %cond.load1109
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16682,9 +15146,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 40
-; CHECK-RV64-NEXT: bltz a1, .LBB61_798
-; CHECK-RV64-NEXT: j .LBB61_288
+; CHECK-RV64-NEXT: j .LBB61_287
; CHECK-RV64-NEXT: .LBB61_798: # %cond.load1113
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16695,9 +15157,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 39
-; CHECK-RV64-NEXT: bltz a1, .LBB61_799
-; CHECK-RV64-NEXT: j .LBB61_289
+; CHECK-RV64-NEXT: j .LBB61_288
; CHECK-RV64-NEXT: .LBB61_799: # %cond.load1117
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16708,9 +15168,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 38
-; CHECK-RV64-NEXT: bltz a1, .LBB61_800
-; CHECK-RV64-NEXT: j .LBB61_290
+; CHECK-RV64-NEXT: j .LBB61_289
; CHECK-RV64-NEXT: .LBB61_800: # %cond.load1121
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16721,9 +15179,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 37
-; CHECK-RV64-NEXT: bltz a1, .LBB61_801
-; CHECK-RV64-NEXT: j .LBB61_291
+; CHECK-RV64-NEXT: j .LBB61_290
; CHECK-RV64-NEXT: .LBB61_801: # %cond.load1125
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16734,9 +15190,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 36
-; CHECK-RV64-NEXT: bltz a1, .LBB61_802
-; CHECK-RV64-NEXT: j .LBB61_292
+; CHECK-RV64-NEXT: j .LBB61_291
; CHECK-RV64-NEXT: .LBB61_802: # %cond.load1129
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16747,9 +15201,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 35
-; CHECK-RV64-NEXT: bltz a1, .LBB61_803
-; CHECK-RV64-NEXT: j .LBB61_293
+; CHECK-RV64-NEXT: j .LBB61_292
; CHECK-RV64-NEXT: .LBB61_803: # %cond.load1133
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16760,9 +15212,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 34
-; CHECK-RV64-NEXT: bltz a1, .LBB61_804
-; CHECK-RV64-NEXT: j .LBB61_294
+; CHECK-RV64-NEXT: j .LBB61_293
; CHECK-RV64-NEXT: .LBB61_804: # %cond.load1137
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16773,9 +15223,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 33
-; CHECK-RV64-NEXT: bltz a1, .LBB61_805
-; CHECK-RV64-NEXT: j .LBB61_295
+; CHECK-RV64-NEXT: j .LBB61_294
; CHECK-RV64-NEXT: .LBB61_805: # %cond.load1141
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16786,9 +15234,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 32
-; CHECK-RV64-NEXT: bltz a1, .LBB61_806
-; CHECK-RV64-NEXT: j .LBB61_296
+; CHECK-RV64-NEXT: j .LBB61_295
; CHECK-RV64-NEXT: .LBB61_806: # %cond.load1145
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16799,9 +15245,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 31
-; CHECK-RV64-NEXT: bltz a1, .LBB61_807
-; CHECK-RV64-NEXT: j .LBB61_297
+; CHECK-RV64-NEXT: j .LBB61_296
; CHECK-RV64-NEXT: .LBB61_807: # %cond.load1149
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16812,9 +15256,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 30
-; CHECK-RV64-NEXT: bltz a1, .LBB61_808
-; CHECK-RV64-NEXT: j .LBB61_298
+; CHECK-RV64-NEXT: j .LBB61_297
; CHECK-RV64-NEXT: .LBB61_808: # %cond.load1153
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16825,9 +15267,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 29
-; CHECK-RV64-NEXT: bltz a1, .LBB61_809
-; CHECK-RV64-NEXT: j .LBB61_299
+; CHECK-RV64-NEXT: j .LBB61_298
; CHECK-RV64-NEXT: .LBB61_809: # %cond.load1157
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16838,9 +15278,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 28
-; CHECK-RV64-NEXT: bltz a1, .LBB61_810
-; CHECK-RV64-NEXT: j .LBB61_300
+; CHECK-RV64-NEXT: j .LBB61_299
; CHECK-RV64-NEXT: .LBB61_810: # %cond.load1161
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16851,9 +15289,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 27
-; CHECK-RV64-NEXT: bltz a1, .LBB61_811
-; CHECK-RV64-NEXT: j .LBB61_301
+; CHECK-RV64-NEXT: j .LBB61_300
; CHECK-RV64-NEXT: .LBB61_811: # %cond.load1165
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16864,9 +15300,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 26
-; CHECK-RV64-NEXT: bltz a1, .LBB61_812
-; CHECK-RV64-NEXT: j .LBB61_302
+; CHECK-RV64-NEXT: j .LBB61_301
; CHECK-RV64-NEXT: .LBB61_812: # %cond.load1169
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16877,9 +15311,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 25
-; CHECK-RV64-NEXT: bltz a1, .LBB61_813
-; CHECK-RV64-NEXT: j .LBB61_303
+; CHECK-RV64-NEXT: j .LBB61_302
; CHECK-RV64-NEXT: .LBB61_813: # %cond.load1173
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16890,9 +15322,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 24
-; CHECK-RV64-NEXT: bltz a1, .LBB61_814
-; CHECK-RV64-NEXT: j .LBB61_304
+; CHECK-RV64-NEXT: j .LBB61_303
; CHECK-RV64-NEXT: .LBB61_814: # %cond.load1177
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16903,9 +15333,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 23
-; CHECK-RV64-NEXT: bltz a1, .LBB61_815
-; CHECK-RV64-NEXT: j .LBB61_305
+; CHECK-RV64-NEXT: j .LBB61_304
; CHECK-RV64-NEXT: .LBB61_815: # %cond.load1181
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16916,9 +15344,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 22
-; CHECK-RV64-NEXT: bltz a1, .LBB61_816
-; CHECK-RV64-NEXT: j .LBB61_306
+; CHECK-RV64-NEXT: j .LBB61_305
; CHECK-RV64-NEXT: .LBB61_816: # %cond.load1185
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16929,9 +15355,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 21
-; CHECK-RV64-NEXT: bltz a1, .LBB61_817
-; CHECK-RV64-NEXT: j .LBB61_307
+; CHECK-RV64-NEXT: j .LBB61_306
; CHECK-RV64-NEXT: .LBB61_817: # %cond.load1189
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16942,9 +15366,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 20
-; CHECK-RV64-NEXT: bltz a1, .LBB61_818
-; CHECK-RV64-NEXT: j .LBB61_308
+; CHECK-RV64-NEXT: j .LBB61_307
; CHECK-RV64-NEXT: .LBB61_818: # %cond.load1193
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16955,9 +15377,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 19
-; CHECK-RV64-NEXT: bltz a1, .LBB61_819
-; CHECK-RV64-NEXT: j .LBB61_309
+; CHECK-RV64-NEXT: j .LBB61_308
; CHECK-RV64-NEXT: .LBB61_819: # %cond.load1197
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16968,9 +15388,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 18
-; CHECK-RV64-NEXT: bltz a1, .LBB61_820
-; CHECK-RV64-NEXT: j .LBB61_310
+; CHECK-RV64-NEXT: j .LBB61_309
; CHECK-RV64-NEXT: .LBB61_820: # %cond.load1201
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16981,9 +15399,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 17
-; CHECK-RV64-NEXT: bltz a1, .LBB61_821
-; CHECK-RV64-NEXT: j .LBB61_311
+; CHECK-RV64-NEXT: j .LBB61_310
; CHECK-RV64-NEXT: .LBB61_821: # %cond.load1205
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16994,9 +15410,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 16
-; CHECK-RV64-NEXT: bltz a1, .LBB61_822
-; CHECK-RV64-NEXT: j .LBB61_312
+; CHECK-RV64-NEXT: j .LBB61_311
; CHECK-RV64-NEXT: .LBB61_822: # %cond.load1209
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17007,9 +15421,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 15
-; CHECK-RV64-NEXT: bltz a1, .LBB61_823
-; CHECK-RV64-NEXT: j .LBB61_313
+; CHECK-RV64-NEXT: j .LBB61_312
; CHECK-RV64-NEXT: .LBB61_823: # %cond.load1213
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17020,9 +15432,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 14
-; CHECK-RV64-NEXT: bltz a1, .LBB61_824
-; CHECK-RV64-NEXT: j .LBB61_314
+; CHECK-RV64-NEXT: j .LBB61_313
; CHECK-RV64-NEXT: .LBB61_824: # %cond.load1217
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17033,9 +15443,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 13
-; CHECK-RV64-NEXT: bltz a1, .LBB61_825
-; CHECK-RV64-NEXT: j .LBB61_315
+; CHECK-RV64-NEXT: j .LBB61_314
; CHECK-RV64-NEXT: .LBB61_825: # %cond.load1221
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17046,9 +15454,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 12
-; CHECK-RV64-NEXT: bltz a1, .LBB61_826
-; CHECK-RV64-NEXT: j .LBB61_316
+; CHECK-RV64-NEXT: j .LBB61_315
; CHECK-RV64-NEXT: .LBB61_826: # %cond.load1225
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17059,9 +15465,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 11
-; CHECK-RV64-NEXT: bltz a1, .LBB61_827
-; CHECK-RV64-NEXT: j .LBB61_317
+; CHECK-RV64-NEXT: j .LBB61_316
; CHECK-RV64-NEXT: .LBB61_827: # %cond.load1229
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17072,9 +15476,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 10
-; CHECK-RV64-NEXT: bltz a1, .LBB61_828
-; CHECK-RV64-NEXT: j .LBB61_318
+; CHECK-RV64-NEXT: j .LBB61_317
; CHECK-RV64-NEXT: .LBB61_828: # %cond.load1233
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17085,9 +15487,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 9
-; CHECK-RV64-NEXT: bltz a1, .LBB61_829
-; CHECK-RV64-NEXT: j .LBB61_319
+; CHECK-RV64-NEXT: j .LBB61_318
; CHECK-RV64-NEXT: .LBB61_829: # %cond.load1237
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17098,9 +15498,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 8
-; CHECK-RV64-NEXT: bltz a1, .LBB61_830
-; CHECK-RV64-NEXT: j .LBB61_320
+; CHECK-RV64-NEXT: j .LBB61_319
; CHECK-RV64-NEXT: .LBB61_830: # %cond.load1241
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17111,9 +15509,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 7
-; CHECK-RV64-NEXT: bltz a1, .LBB61_831
-; CHECK-RV64-NEXT: j .LBB61_321
+; CHECK-RV64-NEXT: j .LBB61_320
; CHECK-RV64-NEXT: .LBB61_831: # %cond.load1245
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17124,9 +15520,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 6
-; CHECK-RV64-NEXT: bltz a1, .LBB61_832
-; CHECK-RV64-NEXT: j .LBB61_322
+; CHECK-RV64-NEXT: j .LBB61_321
; CHECK-RV64-NEXT: .LBB61_832: # %cond.load1249
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17137,9 +15531,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 5
-; CHECK-RV64-NEXT: bltz a1, .LBB61_833
-; CHECK-RV64-NEXT: j .LBB61_323
+; CHECK-RV64-NEXT: j .LBB61_322
; CHECK-RV64-NEXT: .LBB61_833: # %cond.load1253
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17150,9 +15542,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 4
-; CHECK-RV64-NEXT: bltz a1, .LBB61_834
-; CHECK-RV64-NEXT: j .LBB61_324
+; CHECK-RV64-NEXT: j .LBB61_323
; CHECK-RV64-NEXT: .LBB61_834: # %cond.load1257
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17163,9 +15553,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 3
-; CHECK-RV64-NEXT: bltz a1, .LBB61_835
-; CHECK-RV64-NEXT: j .LBB61_325
+; CHECK-RV64-NEXT: j .LBB61_324
; CHECK-RV64-NEXT: .LBB61_835: # %cond.load1261
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17176,11 +15564,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 2
-; CHECK-RV64-NEXT: bgez a1, .LBB61_1029
-; CHECK-RV64-NEXT: j .LBB61_326
-; CHECK-RV64-NEXT: .LBB61_1029: # %cond.load1261
-; CHECK-RV64-NEXT: j .LBB61_327
+; CHECK-RV64-NEXT: j .LBB61_325
; CHECK-RV64-NEXT: .LBB61_836: # %cond.load1273
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vmv.s.x v16, a2
@@ -17189,9 +15573,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 1
-; CHECK-RV64-NEXT: bnez a2, .LBB61_837
-; CHECK-RV64-NEXT: j .LBB61_331
+; CHECK-RV64-NEXT: j .LBB61_330
; CHECK-RV64-NEXT: .LBB61_837: # %cond.load1277
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17202,9 +15584,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 2
-; CHECK-RV64-NEXT: bnez a2, .LBB61_838
-; CHECK-RV64-NEXT: j .LBB61_332
+; CHECK-RV64-NEXT: j .LBB61_331
; CHECK-RV64-NEXT: .LBB61_838: # %cond.load1281
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17215,9 +15595,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 4
-; CHECK-RV64-NEXT: bnez a2, .LBB61_839
-; CHECK-RV64-NEXT: j .LBB61_333
+; CHECK-RV64-NEXT: j .LBB61_332
; CHECK-RV64-NEXT: .LBB61_839: # %cond.load1285
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17228,9 +15606,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 8
-; CHECK-RV64-NEXT: bnez a2, .LBB61_840
-; CHECK-RV64-NEXT: j .LBB61_334
+; CHECK-RV64-NEXT: j .LBB61_333
; CHECK-RV64-NEXT: .LBB61_840: # %cond.load1289
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17241,9 +15617,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 16
-; CHECK-RV64-NEXT: bnez a2, .LBB61_841
-; CHECK-RV64-NEXT: j .LBB61_335
+; CHECK-RV64-NEXT: j .LBB61_334
; CHECK-RV64-NEXT: .LBB61_841: # %cond.load1293
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17254,9 +15628,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 32
-; CHECK-RV64-NEXT: bnez a2, .LBB61_842
-; CHECK-RV64-NEXT: j .LBB61_336
+; CHECK-RV64-NEXT: j .LBB61_335
; CHECK-RV64-NEXT: .LBB61_842: # %cond.load1297
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17267,9 +15639,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 64
-; CHECK-RV64-NEXT: bnez a2, .LBB61_843
-; CHECK-RV64-NEXT: j .LBB61_337
+; CHECK-RV64-NEXT: j .LBB61_336
; CHECK-RV64-NEXT: .LBB61_843: # %cond.load1301
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17280,9 +15650,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 128
-; CHECK-RV64-NEXT: bnez a2, .LBB61_844
-; CHECK-RV64-NEXT: j .LBB61_338
+; CHECK-RV64-NEXT: j .LBB61_337
; CHECK-RV64-NEXT: .LBB61_844: # %cond.load1305
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17293,9 +15661,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 256
-; CHECK-RV64-NEXT: bnez a2, .LBB61_845
-; CHECK-RV64-NEXT: j .LBB61_339
+; CHECK-RV64-NEXT: j .LBB61_338
; CHECK-RV64-NEXT: .LBB61_845: # %cond.load1309
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17306,9 +15672,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 512
-; CHECK-RV64-NEXT: bnez a2, .LBB61_846
-; CHECK-RV64-NEXT: j .LBB61_340
+; CHECK-RV64-NEXT: j .LBB61_339
; CHECK-RV64-NEXT: .LBB61_846: # %cond.load1313
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17319,9 +15683,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 1024
-; CHECK-RV64-NEXT: bnez a2, .LBB61_847
-; CHECK-RV64-NEXT: j .LBB61_341
+; CHECK-RV64-NEXT: j .LBB61_340
; CHECK-RV64-NEXT: .LBB61_847: # %cond.load1317
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17332,9 +15694,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 52
-; CHECK-RV64-NEXT: bltz a2, .LBB61_848
-; CHECK-RV64-NEXT: j .LBB61_342
+; CHECK-RV64-NEXT: j .LBB61_341
; CHECK-RV64-NEXT: .LBB61_848: # %cond.load1321
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17345,9 +15705,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 51
-; CHECK-RV64-NEXT: bltz a2, .LBB61_849
-; CHECK-RV64-NEXT: j .LBB61_343
+; CHECK-RV64-NEXT: j .LBB61_342
; CHECK-RV64-NEXT: .LBB61_849: # %cond.load1325
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17358,9 +15716,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 50
-; CHECK-RV64-NEXT: bltz a2, .LBB61_850
-; CHECK-RV64-NEXT: j .LBB61_344
+; CHECK-RV64-NEXT: j .LBB61_343
; CHECK-RV64-NEXT: .LBB61_850: # %cond.load1329
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17371,9 +15727,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 49
-; CHECK-RV64-NEXT: bltz a2, .LBB61_851
-; CHECK-RV64-NEXT: j .LBB61_345
+; CHECK-RV64-NEXT: j .LBB61_344
; CHECK-RV64-NEXT: .LBB61_851: # %cond.load1333
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17384,9 +15738,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 48
-; CHECK-RV64-NEXT: bltz a2, .LBB61_852
-; CHECK-RV64-NEXT: j .LBB61_346
+; CHECK-RV64-NEXT: j .LBB61_345
; CHECK-RV64-NEXT: .LBB61_852: # %cond.load1337
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17397,9 +15749,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 47
-; CHECK-RV64-NEXT: bltz a2, .LBB61_853
-; CHECK-RV64-NEXT: j .LBB61_347
+; CHECK-RV64-NEXT: j .LBB61_346
; CHECK-RV64-NEXT: .LBB61_853: # %cond.load1341
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17410,9 +15760,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 46
-; CHECK-RV64-NEXT: bltz a2, .LBB61_854
-; CHECK-RV64-NEXT: j .LBB61_348
+; CHECK-RV64-NEXT: j .LBB61_347
; CHECK-RV64-NEXT: .LBB61_854: # %cond.load1345
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17423,9 +15771,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 45
-; CHECK-RV64-NEXT: bltz a2, .LBB61_855
-; CHECK-RV64-NEXT: j .LBB61_349
+; CHECK-RV64-NEXT: j .LBB61_348
; CHECK-RV64-NEXT: .LBB61_855: # %cond.load1349
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17436,9 +15782,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 44
-; CHECK-RV64-NEXT: bltz a2, .LBB61_856
-; CHECK-RV64-NEXT: j .LBB61_350
+; CHECK-RV64-NEXT: j .LBB61_349
; CHECK-RV64-NEXT: .LBB61_856: # %cond.load1353
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17449,9 +15793,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 43
-; CHECK-RV64-NEXT: bltz a2, .LBB61_857
-; CHECK-RV64-NEXT: j .LBB61_351
+; CHECK-RV64-NEXT: j .LBB61_350
; CHECK-RV64-NEXT: .LBB61_857: # %cond.load1357
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17462,9 +15804,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 42
-; CHECK-RV64-NEXT: bltz a2, .LBB61_858
-; CHECK-RV64-NEXT: j .LBB61_352
+; CHECK-RV64-NEXT: j .LBB61_351
; CHECK-RV64-NEXT: .LBB61_858: # %cond.load1361
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17475,9 +15815,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 41
-; CHECK-RV64-NEXT: bltz a2, .LBB61_859
-; CHECK-RV64-NEXT: j .LBB61_353
+; CHECK-RV64-NEXT: j .LBB61_352
; CHECK-RV64-NEXT: .LBB61_859: # %cond.load1365
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17488,9 +15826,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 40
-; CHECK-RV64-NEXT: bltz a2, .LBB61_860
-; CHECK-RV64-NEXT: j .LBB61_354
+; CHECK-RV64-NEXT: j .LBB61_353
; CHECK-RV64-NEXT: .LBB61_860: # %cond.load1369
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17501,9 +15837,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 39
-; CHECK-RV64-NEXT: bltz a2, .LBB61_861
-; CHECK-RV64-NEXT: j .LBB61_355
+; CHECK-RV64-NEXT: j .LBB61_354
; CHECK-RV64-NEXT: .LBB61_861: # %cond.load1373
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17514,9 +15848,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 38
-; CHECK-RV64-NEXT: bltz a2, .LBB61_862
-; CHECK-RV64-NEXT: j .LBB61_356
+; CHECK-RV64-NEXT: j .LBB61_355
; CHECK-RV64-NEXT: .LBB61_862: # %cond.load1377
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17527,9 +15859,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 37
-; CHECK-RV64-NEXT: bltz a2, .LBB61_863
-; CHECK-RV64-NEXT: j .LBB61_357
+; CHECK-RV64-NEXT: j .LBB61_356
; CHECK-RV64-NEXT: .LBB61_863: # %cond.load1381
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17540,9 +15870,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 36
-; CHECK-RV64-NEXT: bltz a2, .LBB61_864
-; CHECK-RV64-NEXT: j .LBB61_358
+; CHECK-RV64-NEXT: j .LBB61_357
; CHECK-RV64-NEXT: .LBB61_864: # %cond.load1385
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17553,9 +15881,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 35
-; CHECK-RV64-NEXT: bltz a2, .LBB61_865
-; CHECK-RV64-NEXT: j .LBB61_359
+; CHECK-RV64-NEXT: j .LBB61_358
; CHECK-RV64-NEXT: .LBB61_865: # %cond.load1389
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17566,9 +15892,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 34
-; CHECK-RV64-NEXT: bltz a2, .LBB61_866
-; CHECK-RV64-NEXT: j .LBB61_360
+; CHECK-RV64-NEXT: j .LBB61_359
; CHECK-RV64-NEXT: .LBB61_866: # %cond.load1393
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17579,9 +15903,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 33
-; CHECK-RV64-NEXT: bltz a2, .LBB61_867
-; CHECK-RV64-NEXT: j .LBB61_361
+; CHECK-RV64-NEXT: j .LBB61_360
; CHECK-RV64-NEXT: .LBB61_867: # %cond.load1397
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17592,9 +15914,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 32
-; CHECK-RV64-NEXT: bltz a2, .LBB61_868
-; CHECK-RV64-NEXT: j .LBB61_362
+; CHECK-RV64-NEXT: j .LBB61_361
; CHECK-RV64-NEXT: .LBB61_868: # %cond.load1401
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17605,9 +15925,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 31
-; CHECK-RV64-NEXT: bltz a2, .LBB61_869
-; CHECK-RV64-NEXT: j .LBB61_363
+; CHECK-RV64-NEXT: j .LBB61_362
; CHECK-RV64-NEXT: .LBB61_869: # %cond.load1405
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17618,9 +15936,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 30
-; CHECK-RV64-NEXT: bltz a2, .LBB61_870
-; CHECK-RV64-NEXT: j .LBB61_364
+; CHECK-RV64-NEXT: j .LBB61_363
; CHECK-RV64-NEXT: .LBB61_870: # %cond.load1409
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17631,9 +15947,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 29
-; CHECK-RV64-NEXT: bltz a2, .LBB61_871
-; CHECK-RV64-NEXT: j .LBB61_365
+; CHECK-RV64-NEXT: j .LBB61_364
; CHECK-RV64-NEXT: .LBB61_871: # %cond.load1413
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17644,9 +15958,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 28
-; CHECK-RV64-NEXT: bltz a2, .LBB61_872
-; CHECK-RV64-NEXT: j .LBB61_366
+; CHECK-RV64-NEXT: j .LBB61_365
; CHECK-RV64-NEXT: .LBB61_872: # %cond.load1417
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17657,9 +15969,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 27
-; CHECK-RV64-NEXT: bltz a2, .LBB61_873
-; CHECK-RV64-NEXT: j .LBB61_367
+; CHECK-RV64-NEXT: j .LBB61_366
; CHECK-RV64-NEXT: .LBB61_873: # %cond.load1421
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17670,9 +15980,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 26
-; CHECK-RV64-NEXT: bltz a2, .LBB61_874
-; CHECK-RV64-NEXT: j .LBB61_368
+; CHECK-RV64-NEXT: j .LBB61_367
; CHECK-RV64-NEXT: .LBB61_874: # %cond.load1425
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17683,9 +15991,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 25
-; CHECK-RV64-NEXT: bltz a2, .LBB61_875
-; CHECK-RV64-NEXT: j .LBB61_369
+; CHECK-RV64-NEXT: j .LBB61_368
; CHECK-RV64-NEXT: .LBB61_875: # %cond.load1429
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17696,9 +16002,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 24
-; CHECK-RV64-NEXT: bltz a2, .LBB61_876
-; CHECK-RV64-NEXT: j .LBB61_370
+; CHECK-RV64-NEXT: j .LBB61_369
; CHECK-RV64-NEXT: .LBB61_876: # %cond.load1433
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17709,9 +16013,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 23
-; CHECK-RV64-NEXT: bltz a2, .LBB61_877
-; CHECK-RV64-NEXT: j .LBB61_371
+; CHECK-RV64-NEXT: j .LBB61_370
; CHECK-RV64-NEXT: .LBB61_877: # %cond.load1437
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17722,9 +16024,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 22
-; CHECK-RV64-NEXT: bltz a2, .LBB61_878
-; CHECK-RV64-NEXT: j .LBB61_372
+; CHECK-RV64-NEXT: j .LBB61_371
; CHECK-RV64-NEXT: .LBB61_878: # %cond.load1441
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17735,9 +16035,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 21
-; CHECK-RV64-NEXT: bltz a2, .LBB61_879
-; CHECK-RV64-NEXT: j .LBB61_373
+; CHECK-RV64-NEXT: j .LBB61_372
; CHECK-RV64-NEXT: .LBB61_879: # %cond.load1445
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17748,9 +16046,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 20
-; CHECK-RV64-NEXT: bltz a2, .LBB61_880
-; CHECK-RV64-NEXT: j .LBB61_374
+; CHECK-RV64-NEXT: j .LBB61_373
; CHECK-RV64-NEXT: .LBB61_880: # %cond.load1449
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17761,9 +16057,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 19
-; CHECK-RV64-NEXT: bltz a2, .LBB61_881
-; CHECK-RV64-NEXT: j .LBB61_375
+; CHECK-RV64-NEXT: j .LBB61_374
; CHECK-RV64-NEXT: .LBB61_881: # %cond.load1453
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17774,9 +16068,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 18
-; CHECK-RV64-NEXT: bltz a2, .LBB61_882
-; CHECK-RV64-NEXT: j .LBB61_376
+; CHECK-RV64-NEXT: j .LBB61_375
; CHECK-RV64-NEXT: .LBB61_882: # %cond.load1457
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17787,9 +16079,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 17
-; CHECK-RV64-NEXT: bltz a2, .LBB61_883
-; CHECK-RV64-NEXT: j .LBB61_377
+; CHECK-RV64-NEXT: j .LBB61_376
; CHECK-RV64-NEXT: .LBB61_883: # %cond.load1461
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17800,9 +16090,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 16
-; CHECK-RV64-NEXT: bltz a2, .LBB61_884
-; CHECK-RV64-NEXT: j .LBB61_378
+; CHECK-RV64-NEXT: j .LBB61_377
; CHECK-RV64-NEXT: .LBB61_884: # %cond.load1465
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17813,9 +16101,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 15
-; CHECK-RV64-NEXT: bltz a2, .LBB61_885
-; CHECK-RV64-NEXT: j .LBB61_379
+; CHECK-RV64-NEXT: j .LBB61_378
; CHECK-RV64-NEXT: .LBB61_885: # %cond.load1469
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17826,9 +16112,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 14
-; CHECK-RV64-NEXT: bltz a2, .LBB61_886
-; CHECK-RV64-NEXT: j .LBB61_380
+; CHECK-RV64-NEXT: j .LBB61_379
; CHECK-RV64-NEXT: .LBB61_886: # %cond.load1473
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17839,9 +16123,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 13
-; CHECK-RV64-NEXT: bltz a2, .LBB61_887
-; CHECK-RV64-NEXT: j .LBB61_381
+; CHECK-RV64-NEXT: j .LBB61_380
; CHECK-RV64-NEXT: .LBB61_887: # %cond.load1477
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17852,9 +16134,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 12
-; CHECK-RV64-NEXT: bltz a2, .LBB61_888
-; CHECK-RV64-NEXT: j .LBB61_382
+; CHECK-RV64-NEXT: j .LBB61_381
; CHECK-RV64-NEXT: .LBB61_888: # %cond.load1481
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17865,9 +16145,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 11
-; CHECK-RV64-NEXT: bltz a2, .LBB61_889
-; CHECK-RV64-NEXT: j .LBB61_383
+; CHECK-RV64-NEXT: j .LBB61_382
; CHECK-RV64-NEXT: .LBB61_889: # %cond.load1485
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17878,9 +16156,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 10
-; CHECK-RV64-NEXT: bltz a2, .LBB61_890
-; CHECK-RV64-NEXT: j .LBB61_384
+; CHECK-RV64-NEXT: j .LBB61_383
; CHECK-RV64-NEXT: .LBB61_890: # %cond.load1489
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17891,9 +16167,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 9
-; CHECK-RV64-NEXT: bltz a2, .LBB61_891
-; CHECK-RV64-NEXT: j .LBB61_385
+; CHECK-RV64-NEXT: j .LBB61_384
; CHECK-RV64-NEXT: .LBB61_891: # %cond.load1493
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17904,9 +16178,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 8
-; CHECK-RV64-NEXT: bltz a2, .LBB61_892
-; CHECK-RV64-NEXT: j .LBB61_386
+; CHECK-RV64-NEXT: j .LBB61_385
; CHECK-RV64-NEXT: .LBB61_892: # %cond.load1497
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17917,9 +16189,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 7
-; CHECK-RV64-NEXT: bltz a2, .LBB61_893
-; CHECK-RV64-NEXT: j .LBB61_387
+; CHECK-RV64-NEXT: j .LBB61_386
; CHECK-RV64-NEXT: .LBB61_893: # %cond.load1501
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17930,9 +16200,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 6
-; CHECK-RV64-NEXT: bltz a2, .LBB61_894
-; CHECK-RV64-NEXT: j .LBB61_388
+; CHECK-RV64-NEXT: j .LBB61_387
; CHECK-RV64-NEXT: .LBB61_894: # %cond.load1505
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17943,9 +16211,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 5
-; CHECK-RV64-NEXT: bltz a2, .LBB61_895
-; CHECK-RV64-NEXT: j .LBB61_389
+; CHECK-RV64-NEXT: j .LBB61_388
; CHECK-RV64-NEXT: .LBB61_895: # %cond.load1509
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17956,9 +16222,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 4
-; CHECK-RV64-NEXT: bltz a2, .LBB61_896
-; CHECK-RV64-NEXT: j .LBB61_390
+; CHECK-RV64-NEXT: j .LBB61_389
; CHECK-RV64-NEXT: .LBB61_896: # %cond.load1513
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17969,9 +16233,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 3
-; CHECK-RV64-NEXT: bltz a2, .LBB61_897
-; CHECK-RV64-NEXT: j .LBB61_391
+; CHECK-RV64-NEXT: j .LBB61_390
; CHECK-RV64-NEXT: .LBB61_897: # %cond.load1517
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17982,11 +16244,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 2
-; CHECK-RV64-NEXT: bgez a2, .LBB61_1030
-; CHECK-RV64-NEXT: j .LBB61_392
-; CHECK-RV64-NEXT: .LBB61_1030: # %cond.load1517
-; CHECK-RV64-NEXT: j .LBB61_393
+; CHECK-RV64-NEXT: j .LBB61_391
; CHECK-RV64-NEXT: .LBB61_898: # %cond.load1529
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vmv.s.x v16, a1
@@ -17995,9 +16253,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 1
-; CHECK-RV64-NEXT: bnez a1, .LBB61_899
-; CHECK-RV64-NEXT: j .LBB61_397
+; CHECK-RV64-NEXT: j .LBB61_396
; CHECK-RV64-NEXT: .LBB61_899: # %cond.load1533
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18008,9 +16264,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 2
-; CHECK-RV64-NEXT: bnez a1, .LBB61_900
-; CHECK-RV64-NEXT: j .LBB61_398
+; CHECK-RV64-NEXT: j .LBB61_397
; CHECK-RV64-NEXT: .LBB61_900: # %cond.load1537
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18021,9 +16275,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 4
-; CHECK-RV64-NEXT: bnez a1, .LBB61_901
-; CHECK-RV64-NEXT: j .LBB61_399
+; CHECK-RV64-NEXT: j .LBB61_398
; CHECK-RV64-NEXT: .LBB61_901: # %cond.load1541
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18034,9 +16286,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 8
-; CHECK-RV64-NEXT: bnez a1, .LBB61_902
-; CHECK-RV64-NEXT: j .LBB61_400
+; CHECK-RV64-NEXT: j .LBB61_399
; CHECK-RV64-NEXT: .LBB61_902: # %cond.load1545
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18047,9 +16297,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 16
-; CHECK-RV64-NEXT: bnez a1, .LBB61_903
-; CHECK-RV64-NEXT: j .LBB61_401
+; CHECK-RV64-NEXT: j .LBB61_400
; CHECK-RV64-NEXT: .LBB61_903: # %cond.load1549
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18060,9 +16308,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 32
-; CHECK-RV64-NEXT: bnez a1, .LBB61_904
-; CHECK-RV64-NEXT: j .LBB61_402
+; CHECK-RV64-NEXT: j .LBB61_401
; CHECK-RV64-NEXT: .LBB61_904: # %cond.load1553
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18073,9 +16319,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 64
-; CHECK-RV64-NEXT: bnez a1, .LBB61_905
-; CHECK-RV64-NEXT: j .LBB61_403
+; CHECK-RV64-NEXT: j .LBB61_402
; CHECK-RV64-NEXT: .LBB61_905: # %cond.load1557
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18086,9 +16330,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 128
-; CHECK-RV64-NEXT: bnez a1, .LBB61_906
-; CHECK-RV64-NEXT: j .LBB61_404
+; CHECK-RV64-NEXT: j .LBB61_403
; CHECK-RV64-NEXT: .LBB61_906: # %cond.load1561
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18099,9 +16341,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 256
-; CHECK-RV64-NEXT: bnez a1, .LBB61_907
-; CHECK-RV64-NEXT: j .LBB61_405
+; CHECK-RV64-NEXT: j .LBB61_404
; CHECK-RV64-NEXT: .LBB61_907: # %cond.load1565
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18112,9 +16352,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 512
-; CHECK-RV64-NEXT: bnez a1, .LBB61_908
-; CHECK-RV64-NEXT: j .LBB61_406
+; CHECK-RV64-NEXT: j .LBB61_405
; CHECK-RV64-NEXT: .LBB61_908: # %cond.load1569
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18125,9 +16363,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a1, a2, 1024
-; CHECK-RV64-NEXT: bnez a1, .LBB61_909
-; CHECK-RV64-NEXT: j .LBB61_407
+; CHECK-RV64-NEXT: j .LBB61_406
; CHECK-RV64-NEXT: .LBB61_909: # %cond.load1573
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18138,9 +16374,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 52
-; CHECK-RV64-NEXT: bltz a1, .LBB61_910
-; CHECK-RV64-NEXT: j .LBB61_408
+; CHECK-RV64-NEXT: j .LBB61_407
; CHECK-RV64-NEXT: .LBB61_910: # %cond.load1577
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18151,9 +16385,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 51
-; CHECK-RV64-NEXT: bltz a1, .LBB61_911
-; CHECK-RV64-NEXT: j .LBB61_409
+; CHECK-RV64-NEXT: j .LBB61_408
; CHECK-RV64-NEXT: .LBB61_911: # %cond.load1581
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18164,9 +16396,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 50
-; CHECK-RV64-NEXT: bltz a1, .LBB61_912
-; CHECK-RV64-NEXT: j .LBB61_410
+; CHECK-RV64-NEXT: j .LBB61_409
; CHECK-RV64-NEXT: .LBB61_912: # %cond.load1585
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18177,9 +16407,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 49
-; CHECK-RV64-NEXT: bltz a1, .LBB61_913
-; CHECK-RV64-NEXT: j .LBB61_411
+; CHECK-RV64-NEXT: j .LBB61_410
; CHECK-RV64-NEXT: .LBB61_913: # %cond.load1589
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18190,9 +16418,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 48
-; CHECK-RV64-NEXT: bltz a1, .LBB61_914
-; CHECK-RV64-NEXT: j .LBB61_412
+; CHECK-RV64-NEXT: j .LBB61_411
; CHECK-RV64-NEXT: .LBB61_914: # %cond.load1593
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18203,9 +16429,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 47
-; CHECK-RV64-NEXT: bltz a1, .LBB61_915
-; CHECK-RV64-NEXT: j .LBB61_413
+; CHECK-RV64-NEXT: j .LBB61_412
; CHECK-RV64-NEXT: .LBB61_915: # %cond.load1597
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18216,9 +16440,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 46
-; CHECK-RV64-NEXT: bltz a1, .LBB61_916
-; CHECK-RV64-NEXT: j .LBB61_414
+; CHECK-RV64-NEXT: j .LBB61_413
; CHECK-RV64-NEXT: .LBB61_916: # %cond.load1601
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18229,9 +16451,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 45
-; CHECK-RV64-NEXT: bltz a1, .LBB61_917
-; CHECK-RV64-NEXT: j .LBB61_415
+; CHECK-RV64-NEXT: j .LBB61_414
; CHECK-RV64-NEXT: .LBB61_917: # %cond.load1605
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18242,9 +16462,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 44
-; CHECK-RV64-NEXT: bltz a1, .LBB61_918
-; CHECK-RV64-NEXT: j .LBB61_416
+; CHECK-RV64-NEXT: j .LBB61_415
; CHECK-RV64-NEXT: .LBB61_918: # %cond.load1609
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18255,9 +16473,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 43
-; CHECK-RV64-NEXT: bltz a1, .LBB61_919
-; CHECK-RV64-NEXT: j .LBB61_417
+; CHECK-RV64-NEXT: j .LBB61_416
; CHECK-RV64-NEXT: .LBB61_919: # %cond.load1613
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18268,9 +16484,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 42
-; CHECK-RV64-NEXT: bltz a1, .LBB61_920
-; CHECK-RV64-NEXT: j .LBB61_418
+; CHECK-RV64-NEXT: j .LBB61_417
; CHECK-RV64-NEXT: .LBB61_920: # %cond.load1617
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18281,9 +16495,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 41
-; CHECK-RV64-NEXT: bltz a1, .LBB61_921
-; CHECK-RV64-NEXT: j .LBB61_419
+; CHECK-RV64-NEXT: j .LBB61_418
; CHECK-RV64-NEXT: .LBB61_921: # %cond.load1621
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18294,9 +16506,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 40
-; CHECK-RV64-NEXT: bltz a1, .LBB61_922
-; CHECK-RV64-NEXT: j .LBB61_420
+; CHECK-RV64-NEXT: j .LBB61_419
; CHECK-RV64-NEXT: .LBB61_922: # %cond.load1625
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18307,9 +16517,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 39
-; CHECK-RV64-NEXT: bltz a1, .LBB61_923
-; CHECK-RV64-NEXT: j .LBB61_421
+; CHECK-RV64-NEXT: j .LBB61_420
; CHECK-RV64-NEXT: .LBB61_923: # %cond.load1629
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18320,9 +16528,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 38
-; CHECK-RV64-NEXT: bltz a1, .LBB61_924
-; CHECK-RV64-NEXT: j .LBB61_422
+; CHECK-RV64-NEXT: j .LBB61_421
; CHECK-RV64-NEXT: .LBB61_924: # %cond.load1633
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18333,9 +16539,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 37
-; CHECK-RV64-NEXT: bltz a1, .LBB61_925
-; CHECK-RV64-NEXT: j .LBB61_423
+; CHECK-RV64-NEXT: j .LBB61_422
; CHECK-RV64-NEXT: .LBB61_925: # %cond.load1637
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18346,9 +16550,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 36
-; CHECK-RV64-NEXT: bltz a1, .LBB61_926
-; CHECK-RV64-NEXT: j .LBB61_424
+; CHECK-RV64-NEXT: j .LBB61_423
; CHECK-RV64-NEXT: .LBB61_926: # %cond.load1641
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18359,9 +16561,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 35
-; CHECK-RV64-NEXT: bltz a1, .LBB61_927
-; CHECK-RV64-NEXT: j .LBB61_425
+; CHECK-RV64-NEXT: j .LBB61_424
; CHECK-RV64-NEXT: .LBB61_927: # %cond.load1645
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18372,9 +16572,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 34
-; CHECK-RV64-NEXT: bltz a1, .LBB61_928
-; CHECK-RV64-NEXT: j .LBB61_426
+; CHECK-RV64-NEXT: j .LBB61_425
; CHECK-RV64-NEXT: .LBB61_928: # %cond.load1649
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18385,9 +16583,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 33
-; CHECK-RV64-NEXT: bltz a1, .LBB61_929
-; CHECK-RV64-NEXT: j .LBB61_427
+; CHECK-RV64-NEXT: j .LBB61_426
; CHECK-RV64-NEXT: .LBB61_929: # %cond.load1653
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18398,9 +16594,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 32
-; CHECK-RV64-NEXT: bltz a1, .LBB61_930
-; CHECK-RV64-NEXT: j .LBB61_428
+; CHECK-RV64-NEXT: j .LBB61_427
; CHECK-RV64-NEXT: .LBB61_930: # %cond.load1657
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18411,9 +16605,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 31
-; CHECK-RV64-NEXT: bltz a1, .LBB61_931
-; CHECK-RV64-NEXT: j .LBB61_429
+; CHECK-RV64-NEXT: j .LBB61_428
; CHECK-RV64-NEXT: .LBB61_931: # %cond.load1661
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18424,9 +16616,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 30
-; CHECK-RV64-NEXT: bltz a1, .LBB61_932
-; CHECK-RV64-NEXT: j .LBB61_430
+; CHECK-RV64-NEXT: j .LBB61_429
; CHECK-RV64-NEXT: .LBB61_932: # %cond.load1665
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18437,9 +16627,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 29
-; CHECK-RV64-NEXT: bltz a1, .LBB61_933
-; CHECK-RV64-NEXT: j .LBB61_431
+; CHECK-RV64-NEXT: j .LBB61_430
; CHECK-RV64-NEXT: .LBB61_933: # %cond.load1669
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18450,9 +16638,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 28
-; CHECK-RV64-NEXT: bltz a1, .LBB61_934
-; CHECK-RV64-NEXT: j .LBB61_432
+; CHECK-RV64-NEXT: j .LBB61_431
; CHECK-RV64-NEXT: .LBB61_934: # %cond.load1673
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18463,9 +16649,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 27
-; CHECK-RV64-NEXT: bltz a1, .LBB61_935
-; CHECK-RV64-NEXT: j .LBB61_433
+; CHECK-RV64-NEXT: j .LBB61_432
; CHECK-RV64-NEXT: .LBB61_935: # %cond.load1677
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18476,9 +16660,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 26
-; CHECK-RV64-NEXT: bltz a1, .LBB61_936
-; CHECK-RV64-NEXT: j .LBB61_434
+; CHECK-RV64-NEXT: j .LBB61_433
; CHECK-RV64-NEXT: .LBB61_936: # %cond.load1681
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18489,9 +16671,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 25
-; CHECK-RV64-NEXT: bltz a1, .LBB61_937
-; CHECK-RV64-NEXT: j .LBB61_435
+; CHECK-RV64-NEXT: j .LBB61_434
; CHECK-RV64-NEXT: .LBB61_937: # %cond.load1685
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18502,9 +16682,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 24
-; CHECK-RV64-NEXT: bltz a1, .LBB61_938
-; CHECK-RV64-NEXT: j .LBB61_436
+; CHECK-RV64-NEXT: j .LBB61_435
; CHECK-RV64-NEXT: .LBB61_938: # %cond.load1689
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18515,9 +16693,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 23
-; CHECK-RV64-NEXT: bltz a1, .LBB61_939
-; CHECK-RV64-NEXT: j .LBB61_437
+; CHECK-RV64-NEXT: j .LBB61_436
; CHECK-RV64-NEXT: .LBB61_939: # %cond.load1693
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18528,9 +16704,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 22
-; CHECK-RV64-NEXT: bltz a1, .LBB61_940
-; CHECK-RV64-NEXT: j .LBB61_438
+; CHECK-RV64-NEXT: j .LBB61_437
; CHECK-RV64-NEXT: .LBB61_940: # %cond.load1697
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18541,9 +16715,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 21
-; CHECK-RV64-NEXT: bltz a1, .LBB61_941
-; CHECK-RV64-NEXT: j .LBB61_439
+; CHECK-RV64-NEXT: j .LBB61_438
; CHECK-RV64-NEXT: .LBB61_941: # %cond.load1701
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18554,9 +16726,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 20
-; CHECK-RV64-NEXT: bltz a1, .LBB61_942
-; CHECK-RV64-NEXT: j .LBB61_440
+; CHECK-RV64-NEXT: j .LBB61_439
; CHECK-RV64-NEXT: .LBB61_942: # %cond.load1705
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18567,9 +16737,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 19
-; CHECK-RV64-NEXT: bltz a1, .LBB61_943
-; CHECK-RV64-NEXT: j .LBB61_441
+; CHECK-RV64-NEXT: j .LBB61_440
; CHECK-RV64-NEXT: .LBB61_943: # %cond.load1709
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18580,9 +16748,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 18
-; CHECK-RV64-NEXT: bltz a1, .LBB61_944
-; CHECK-RV64-NEXT: j .LBB61_442
+; CHECK-RV64-NEXT: j .LBB61_441
; CHECK-RV64-NEXT: .LBB61_944: # %cond.load1713
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18593,9 +16759,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 17
-; CHECK-RV64-NEXT: bltz a1, .LBB61_945
-; CHECK-RV64-NEXT: j .LBB61_443
+; CHECK-RV64-NEXT: j .LBB61_442
; CHECK-RV64-NEXT: .LBB61_945: # %cond.load1717
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18606,9 +16770,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 16
-; CHECK-RV64-NEXT: bltz a1, .LBB61_946
-; CHECK-RV64-NEXT: j .LBB61_444
+; CHECK-RV64-NEXT: j .LBB61_443
; CHECK-RV64-NEXT: .LBB61_946: # %cond.load1721
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18619,9 +16781,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 15
-; CHECK-RV64-NEXT: bltz a1, .LBB61_947
-; CHECK-RV64-NEXT: j .LBB61_445
+; CHECK-RV64-NEXT: j .LBB61_444
; CHECK-RV64-NEXT: .LBB61_947: # %cond.load1725
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18632,9 +16792,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 14
-; CHECK-RV64-NEXT: bltz a1, .LBB61_948
-; CHECK-RV64-NEXT: j .LBB61_446
+; CHECK-RV64-NEXT: j .LBB61_445
; CHECK-RV64-NEXT: .LBB61_948: # %cond.load1729
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18645,9 +16803,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 13
-; CHECK-RV64-NEXT: bltz a1, .LBB61_949
-; CHECK-RV64-NEXT: j .LBB61_447
+; CHECK-RV64-NEXT: j .LBB61_446
; CHECK-RV64-NEXT: .LBB61_949: # %cond.load1733
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18658,9 +16814,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 12
-; CHECK-RV64-NEXT: bltz a1, .LBB61_950
-; CHECK-RV64-NEXT: j .LBB61_448
+; CHECK-RV64-NEXT: j .LBB61_447
; CHECK-RV64-NEXT: .LBB61_950: # %cond.load1737
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18671,9 +16825,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 11
-; CHECK-RV64-NEXT: bltz a1, .LBB61_951
-; CHECK-RV64-NEXT: j .LBB61_449
+; CHECK-RV64-NEXT: j .LBB61_448
; CHECK-RV64-NEXT: .LBB61_951: # %cond.load1741
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18684,9 +16836,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 10
-; CHECK-RV64-NEXT: bltz a1, .LBB61_952
-; CHECK-RV64-NEXT: j .LBB61_450
+; CHECK-RV64-NEXT: j .LBB61_449
; CHECK-RV64-NEXT: .LBB61_952: # %cond.load1745
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18697,9 +16847,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 9
-; CHECK-RV64-NEXT: bltz a1, .LBB61_953
-; CHECK-RV64-NEXT: j .LBB61_451
+; CHECK-RV64-NEXT: j .LBB61_450
; CHECK-RV64-NEXT: .LBB61_953: # %cond.load1749
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18710,9 +16858,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 8
-; CHECK-RV64-NEXT: bltz a1, .LBB61_954
-; CHECK-RV64-NEXT: j .LBB61_452
+; CHECK-RV64-NEXT: j .LBB61_451
; CHECK-RV64-NEXT: .LBB61_954: # %cond.load1753
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18723,9 +16869,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 7
-; CHECK-RV64-NEXT: bltz a1, .LBB61_955
-; CHECK-RV64-NEXT: j .LBB61_453
+; CHECK-RV64-NEXT: j .LBB61_452
; CHECK-RV64-NEXT: .LBB61_955: # %cond.load1757
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18736,9 +16880,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 6
-; CHECK-RV64-NEXT: bltz a1, .LBB61_956
-; CHECK-RV64-NEXT: j .LBB61_454
+; CHECK-RV64-NEXT: j .LBB61_453
; CHECK-RV64-NEXT: .LBB61_956: # %cond.load1761
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18749,9 +16891,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 5
-; CHECK-RV64-NEXT: bltz a1, .LBB61_957
-; CHECK-RV64-NEXT: j .LBB61_455
+; CHECK-RV64-NEXT: j .LBB61_454
; CHECK-RV64-NEXT: .LBB61_957: # %cond.load1765
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18762,9 +16902,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 4
-; CHECK-RV64-NEXT: bltz a1, .LBB61_958
-; CHECK-RV64-NEXT: j .LBB61_456
+; CHECK-RV64-NEXT: j .LBB61_455
; CHECK-RV64-NEXT: .LBB61_958: # %cond.load1769
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18775,9 +16913,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 3
-; CHECK-RV64-NEXT: bltz a1, .LBB61_959
-; CHECK-RV64-NEXT: j .LBB61_457
+; CHECK-RV64-NEXT: j .LBB61_456
; CHECK-RV64-NEXT: .LBB61_959: # %cond.load1773
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18788,11 +16924,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a1, a2, 2
-; CHECK-RV64-NEXT: bgez a1, .LBB61_1031
-; CHECK-RV64-NEXT: j .LBB61_458
-; CHECK-RV64-NEXT: .LBB61_1031: # %cond.load1773
-; CHECK-RV64-NEXT: j .LBB61_459
+; CHECK-RV64-NEXT: j .LBB61_457
; CHECK-RV64-NEXT: .LBB61_960: # %cond.load1785
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vmv.s.x v16, a2
@@ -18801,9 +16933,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 1
-; CHECK-RV64-NEXT: bnez a2, .LBB61_961
-; CHECK-RV64-NEXT: j .LBB61_463
+; CHECK-RV64-NEXT: j .LBB61_462
; CHECK-RV64-NEXT: .LBB61_961: # %cond.load1789
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18814,9 +16944,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 2
-; CHECK-RV64-NEXT: bnez a2, .LBB61_962
-; CHECK-RV64-NEXT: j .LBB61_464
+; CHECK-RV64-NEXT: j .LBB61_463
; CHECK-RV64-NEXT: .LBB61_962: # %cond.load1793
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18827,9 +16955,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 4
-; CHECK-RV64-NEXT: bnez a2, .LBB61_963
-; CHECK-RV64-NEXT: j .LBB61_465
+; CHECK-RV64-NEXT: j .LBB61_464
; CHECK-RV64-NEXT: .LBB61_963: # %cond.load1797
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18840,9 +16966,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 8
-; CHECK-RV64-NEXT: bnez a2, .LBB61_964
-; CHECK-RV64-NEXT: j .LBB61_466
+; CHECK-RV64-NEXT: j .LBB61_465
; CHECK-RV64-NEXT: .LBB61_964: # %cond.load1801
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18853,9 +16977,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 16
-; CHECK-RV64-NEXT: bnez a2, .LBB61_965
-; CHECK-RV64-NEXT: j .LBB61_467
+; CHECK-RV64-NEXT: j .LBB61_466
; CHECK-RV64-NEXT: .LBB61_965: # %cond.load1805
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18866,9 +16988,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 32
-; CHECK-RV64-NEXT: bnez a2, .LBB61_966
-; CHECK-RV64-NEXT: j .LBB61_468
+; CHECK-RV64-NEXT: j .LBB61_467
; CHECK-RV64-NEXT: .LBB61_966: # %cond.load1809
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18879,9 +16999,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 64
-; CHECK-RV64-NEXT: bnez a2, .LBB61_967
-; CHECK-RV64-NEXT: j .LBB61_469
+; CHECK-RV64-NEXT: j .LBB61_468
; CHECK-RV64-NEXT: .LBB61_967: # %cond.load1813
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18892,9 +17010,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 128
-; CHECK-RV64-NEXT: bnez a2, .LBB61_968
-; CHECK-RV64-NEXT: j .LBB61_470
+; CHECK-RV64-NEXT: j .LBB61_469
; CHECK-RV64-NEXT: .LBB61_968: # %cond.load1817
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18905,9 +17021,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 256
-; CHECK-RV64-NEXT: bnez a2, .LBB61_969
-; CHECK-RV64-NEXT: j .LBB61_471
+; CHECK-RV64-NEXT: j .LBB61_470
; CHECK-RV64-NEXT: .LBB61_969: # %cond.load1821
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18918,9 +17032,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 512
-; CHECK-RV64-NEXT: bnez a2, .LBB61_970
-; CHECK-RV64-NEXT: j .LBB61_472
+; CHECK-RV64-NEXT: j .LBB61_471
; CHECK-RV64-NEXT: .LBB61_970: # %cond.load1825
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18931,9 +17043,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: andi a2, a1, 1024
-; CHECK-RV64-NEXT: bnez a2, .LBB61_971
-; CHECK-RV64-NEXT: j .LBB61_473
+; CHECK-RV64-NEXT: j .LBB61_472
; CHECK-RV64-NEXT: .LBB61_971: # %cond.load1829
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18944,9 +17054,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 52
-; CHECK-RV64-NEXT: bltz a2, .LBB61_972
-; CHECK-RV64-NEXT: j .LBB61_474
+; CHECK-RV64-NEXT: j .LBB61_473
; CHECK-RV64-NEXT: .LBB61_972: # %cond.load1833
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18957,9 +17065,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 51
-; CHECK-RV64-NEXT: bltz a2, .LBB61_973
-; CHECK-RV64-NEXT: j .LBB61_475
+; CHECK-RV64-NEXT: j .LBB61_474
; CHECK-RV64-NEXT: .LBB61_973: # %cond.load1837
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18970,9 +17076,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 50
-; CHECK-RV64-NEXT: bltz a2, .LBB61_974
-; CHECK-RV64-NEXT: j .LBB61_476
+; CHECK-RV64-NEXT: j .LBB61_475
; CHECK-RV64-NEXT: .LBB61_974: # %cond.load1841
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18983,9 +17087,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 49
-; CHECK-RV64-NEXT: bltz a2, .LBB61_975
-; CHECK-RV64-NEXT: j .LBB61_477
+; CHECK-RV64-NEXT: j .LBB61_476
; CHECK-RV64-NEXT: .LBB61_975: # %cond.load1845
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -18996,9 +17098,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 48
-; CHECK-RV64-NEXT: bltz a2, .LBB61_976
-; CHECK-RV64-NEXT: j .LBB61_478
+; CHECK-RV64-NEXT: j .LBB61_477
; CHECK-RV64-NEXT: .LBB61_976: # %cond.load1849
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19009,9 +17109,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 47
-; CHECK-RV64-NEXT: bltz a2, .LBB61_977
-; CHECK-RV64-NEXT: j .LBB61_479
+; CHECK-RV64-NEXT: j .LBB61_478
; CHECK-RV64-NEXT: .LBB61_977: # %cond.load1853
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19022,9 +17120,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 46
-; CHECK-RV64-NEXT: bltz a2, .LBB61_978
-; CHECK-RV64-NEXT: j .LBB61_480
+; CHECK-RV64-NEXT: j .LBB61_479
; CHECK-RV64-NEXT: .LBB61_978: # %cond.load1857
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19035,9 +17131,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 45
-; CHECK-RV64-NEXT: bltz a2, .LBB61_979
-; CHECK-RV64-NEXT: j .LBB61_481
+; CHECK-RV64-NEXT: j .LBB61_480
; CHECK-RV64-NEXT: .LBB61_979: # %cond.load1861
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19048,9 +17142,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 44
-; CHECK-RV64-NEXT: bltz a2, .LBB61_980
-; CHECK-RV64-NEXT: j .LBB61_482
+; CHECK-RV64-NEXT: j .LBB61_481
; CHECK-RV64-NEXT: .LBB61_980: # %cond.load1865
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19061,9 +17153,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 43
-; CHECK-RV64-NEXT: bltz a2, .LBB61_981
-; CHECK-RV64-NEXT: j .LBB61_483
+; CHECK-RV64-NEXT: j .LBB61_482
; CHECK-RV64-NEXT: .LBB61_981: # %cond.load1869
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19074,9 +17164,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 42
-; CHECK-RV64-NEXT: bltz a2, .LBB61_982
-; CHECK-RV64-NEXT: j .LBB61_484
+; CHECK-RV64-NEXT: j .LBB61_483
; CHECK-RV64-NEXT: .LBB61_982: # %cond.load1873
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19087,9 +17175,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 41
-; CHECK-RV64-NEXT: bltz a2, .LBB61_983
-; CHECK-RV64-NEXT: j .LBB61_485
+; CHECK-RV64-NEXT: j .LBB61_484
; CHECK-RV64-NEXT: .LBB61_983: # %cond.load1877
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19100,9 +17186,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 40
-; CHECK-RV64-NEXT: bltz a2, .LBB61_984
-; CHECK-RV64-NEXT: j .LBB61_486
+; CHECK-RV64-NEXT: j .LBB61_485
; CHECK-RV64-NEXT: .LBB61_984: # %cond.load1881
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19113,9 +17197,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 39
-; CHECK-RV64-NEXT: bltz a2, .LBB61_985
-; CHECK-RV64-NEXT: j .LBB61_487
+; CHECK-RV64-NEXT: j .LBB61_486
; CHECK-RV64-NEXT: .LBB61_985: # %cond.load1885
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19126,9 +17208,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 38
-; CHECK-RV64-NEXT: bltz a2, .LBB61_986
-; CHECK-RV64-NEXT: j .LBB61_488
+; CHECK-RV64-NEXT: j .LBB61_487
; CHECK-RV64-NEXT: .LBB61_986: # %cond.load1889
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19139,9 +17219,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 37
-; CHECK-RV64-NEXT: bltz a2, .LBB61_987
-; CHECK-RV64-NEXT: j .LBB61_489
+; CHECK-RV64-NEXT: j .LBB61_488
; CHECK-RV64-NEXT: .LBB61_987: # %cond.load1893
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19152,9 +17230,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 36
-; CHECK-RV64-NEXT: bltz a2, .LBB61_988
-; CHECK-RV64-NEXT: j .LBB61_490
+; CHECK-RV64-NEXT: j .LBB61_489
; CHECK-RV64-NEXT: .LBB61_988: # %cond.load1897
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19165,9 +17241,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 35
-; CHECK-RV64-NEXT: bltz a2, .LBB61_989
-; CHECK-RV64-NEXT: j .LBB61_491
+; CHECK-RV64-NEXT: j .LBB61_490
; CHECK-RV64-NEXT: .LBB61_989: # %cond.load1901
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19178,9 +17252,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 34
-; CHECK-RV64-NEXT: bltz a2, .LBB61_990
-; CHECK-RV64-NEXT: j .LBB61_492
+; CHECK-RV64-NEXT: j .LBB61_491
; CHECK-RV64-NEXT: .LBB61_990: # %cond.load1905
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19191,9 +17263,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 33
-; CHECK-RV64-NEXT: bltz a2, .LBB61_991
-; CHECK-RV64-NEXT: j .LBB61_493
+; CHECK-RV64-NEXT: j .LBB61_492
; CHECK-RV64-NEXT: .LBB61_991: # %cond.load1909
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19204,9 +17274,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 32
-; CHECK-RV64-NEXT: bltz a2, .LBB61_992
-; CHECK-RV64-NEXT: j .LBB61_494
+; CHECK-RV64-NEXT: j .LBB61_493
; CHECK-RV64-NEXT: .LBB61_992: # %cond.load1913
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19217,9 +17285,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 31
-; CHECK-RV64-NEXT: bltz a2, .LBB61_993
-; CHECK-RV64-NEXT: j .LBB61_495
+; CHECK-RV64-NEXT: j .LBB61_494
; CHECK-RV64-NEXT: .LBB61_993: # %cond.load1917
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19230,9 +17296,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 30
-; CHECK-RV64-NEXT: bltz a2, .LBB61_994
-; CHECK-RV64-NEXT: j .LBB61_496
+; CHECK-RV64-NEXT: j .LBB61_495
; CHECK-RV64-NEXT: .LBB61_994: # %cond.load1921
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19243,9 +17307,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 29
-; CHECK-RV64-NEXT: bltz a2, .LBB61_995
-; CHECK-RV64-NEXT: j .LBB61_497
+; CHECK-RV64-NEXT: j .LBB61_496
; CHECK-RV64-NEXT: .LBB61_995: # %cond.load1925
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19256,9 +17318,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 28
-; CHECK-RV64-NEXT: bltz a2, .LBB61_996
-; CHECK-RV64-NEXT: j .LBB61_498
+; CHECK-RV64-NEXT: j .LBB61_497
; CHECK-RV64-NEXT: .LBB61_996: # %cond.load1929
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19269,9 +17329,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 27
-; CHECK-RV64-NEXT: bltz a2, .LBB61_997
-; CHECK-RV64-NEXT: j .LBB61_499
+; CHECK-RV64-NEXT: j .LBB61_498
; CHECK-RV64-NEXT: .LBB61_997: # %cond.load1933
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19282,9 +17340,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 26
-; CHECK-RV64-NEXT: bltz a2, .LBB61_998
-; CHECK-RV64-NEXT: j .LBB61_500
+; CHECK-RV64-NEXT: j .LBB61_499
; CHECK-RV64-NEXT: .LBB61_998: # %cond.load1937
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19295,9 +17351,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 25
-; CHECK-RV64-NEXT: bltz a2, .LBB61_999
-; CHECK-RV64-NEXT: j .LBB61_501
+; CHECK-RV64-NEXT: j .LBB61_500
; CHECK-RV64-NEXT: .LBB61_999: # %cond.load1941
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19308,9 +17362,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 24
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1000
-; CHECK-RV64-NEXT: j .LBB61_502
+; CHECK-RV64-NEXT: j .LBB61_501
; CHECK-RV64-NEXT: .LBB61_1000: # %cond.load1945
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19321,9 +17373,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 23
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1001
-; CHECK-RV64-NEXT: j .LBB61_503
+; CHECK-RV64-NEXT: j .LBB61_502
; CHECK-RV64-NEXT: .LBB61_1001: # %cond.load1949
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19334,9 +17384,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 22
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1002
-; CHECK-RV64-NEXT: j .LBB61_504
+; CHECK-RV64-NEXT: j .LBB61_503
; CHECK-RV64-NEXT: .LBB61_1002: # %cond.load1953
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19347,9 +17395,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 21
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1003
-; CHECK-RV64-NEXT: j .LBB61_505
+; CHECK-RV64-NEXT: j .LBB61_504
; CHECK-RV64-NEXT: .LBB61_1003: # %cond.load1957
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19360,9 +17406,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 20
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1004
-; CHECK-RV64-NEXT: j .LBB61_506
+; CHECK-RV64-NEXT: j .LBB61_505
; CHECK-RV64-NEXT: .LBB61_1004: # %cond.load1961
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19373,9 +17417,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 19
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1005
-; CHECK-RV64-NEXT: j .LBB61_507
+; CHECK-RV64-NEXT: j .LBB61_506
; CHECK-RV64-NEXT: .LBB61_1005: # %cond.load1965
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19386,9 +17428,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 18
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1006
-; CHECK-RV64-NEXT: j .LBB61_508
+; CHECK-RV64-NEXT: j .LBB61_507
; CHECK-RV64-NEXT: .LBB61_1006: # %cond.load1969
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19399,9 +17439,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 17
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1007
-; CHECK-RV64-NEXT: j .LBB61_509
+; CHECK-RV64-NEXT: j .LBB61_508
; CHECK-RV64-NEXT: .LBB61_1007: # %cond.load1973
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19412,9 +17450,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 16
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1008
-; CHECK-RV64-NEXT: j .LBB61_510
+; CHECK-RV64-NEXT: j .LBB61_509
; CHECK-RV64-NEXT: .LBB61_1008: # %cond.load1977
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19425,9 +17461,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 15
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1009
-; CHECK-RV64-NEXT: j .LBB61_511
+; CHECK-RV64-NEXT: j .LBB61_510
; CHECK-RV64-NEXT: .LBB61_1009: # %cond.load1981
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19438,9 +17472,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 14
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1010
-; CHECK-RV64-NEXT: j .LBB61_512
+; CHECK-RV64-NEXT: j .LBB61_511
; CHECK-RV64-NEXT: .LBB61_1010: # %cond.load1985
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19451,9 +17483,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 13
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1011
-; CHECK-RV64-NEXT: j .LBB61_513
+; CHECK-RV64-NEXT: j .LBB61_512
; CHECK-RV64-NEXT: .LBB61_1011: # %cond.load1989
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19464,9 +17494,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 12
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1012
-; CHECK-RV64-NEXT: j .LBB61_514
+; CHECK-RV64-NEXT: j .LBB61_513
; CHECK-RV64-NEXT: .LBB61_1012: # %cond.load1993
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19477,9 +17505,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 11
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1013
-; CHECK-RV64-NEXT: j .LBB61_515
+; CHECK-RV64-NEXT: j .LBB61_514
; CHECK-RV64-NEXT: .LBB61_1013: # %cond.load1997
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19490,9 +17516,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 10
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1014
-; CHECK-RV64-NEXT: j .LBB61_516
+; CHECK-RV64-NEXT: j .LBB61_515
; CHECK-RV64-NEXT: .LBB61_1014: # %cond.load2001
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19503,9 +17527,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 9
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1015
-; CHECK-RV64-NEXT: j .LBB61_517
+; CHECK-RV64-NEXT: j .LBB61_516
; CHECK-RV64-NEXT: .LBB61_1015: # %cond.load2005
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19516,9 +17538,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 8
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1016
-; CHECK-RV64-NEXT: j .LBB61_518
+; CHECK-RV64-NEXT: j .LBB61_517
; CHECK-RV64-NEXT: .LBB61_1016: # %cond.load2009
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19529,9 +17549,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 7
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1017
-; CHECK-RV64-NEXT: j .LBB61_519
+; CHECK-RV64-NEXT: j .LBB61_518
; CHECK-RV64-NEXT: .LBB61_1017: # %cond.load2013
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19542,9 +17560,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 6
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1018
-; CHECK-RV64-NEXT: j .LBB61_520
+; CHECK-RV64-NEXT: j .LBB61_519
; CHECK-RV64-NEXT: .LBB61_1018: # %cond.load2017
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19555,9 +17571,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 5
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1019
-; CHECK-RV64-NEXT: j .LBB61_521
+; CHECK-RV64-NEXT: j .LBB61_520
; CHECK-RV64-NEXT: .LBB61_1019: # %cond.load2021
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19568,9 +17582,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 4
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1020
-; CHECK-RV64-NEXT: j .LBB61_522
+; CHECK-RV64-NEXT: j .LBB61_521
; CHECK-RV64-NEXT: .LBB61_1020: # %cond.load2025
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19581,9 +17593,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 3
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1021
-; CHECK-RV64-NEXT: j .LBB61_523
+; CHECK-RV64-NEXT: j .LBB61_522
; CHECK-RV64-NEXT: .LBB61_1021: # %cond.load2029
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19594,9 +17604,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 2
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1022
-; CHECK-RV64-NEXT: j .LBB61_524
+; CHECK-RV64-NEXT: j .LBB61_523
; CHECK-RV64-NEXT: .LBB61_1022: # %cond.load2033
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -19607,9 +17615,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: slli a2, a1, 1
-; CHECK-RV64-NEXT: bltz a2, .LBB61_1023
-; CHECK-RV64-NEXT: j .LBB61_525
+; CHECK-RV64-NEXT: j .LBB61_524
; CHECK-RV64-NEXT: .LBB61_1023: # %cond.load2037
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 76590d47a3230..3b6e3c5c550f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -85,17 +85,16 @@ define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru)
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB1_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
-; RV64ZVE32F-NEXT: .LBB1_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB1_3: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
+; RV64ZVE32F-NEXT: j .LBB1_1
; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
@@ -472,7 +471,7 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru)
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB8_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB8_1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB8_6
; RV64ZVE32F-NEXT: .LBB8_2: # %else2
@@ -481,15 +480,14 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru)
; RV64ZVE32F-NEXT: .LBB8_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB8_8
-; RV64ZVE32F-NEXT: .LBB8_4: # %else8
+; RV64ZVE32F-NEXT: # %bb.4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB8_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB8_2
+; RV64ZVE32F-NEXT: j .LBB8_1
; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -497,16 +495,14 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB8_3
+; RV64ZVE32F-NEXT: j .LBB8_2
; RV64ZVE32F-NEXT: .LBB8_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: andi a1, a1, 8
-; RV64ZVE32F-NEXT: beqz a1, .LBB8_4
+; RV64ZVE32F-NEXT: j .LBB8_3
; RV64ZVE32F-NEXT: .LBB8_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
@@ -596,7 +592,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru)
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB11_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB11_1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB11_10
; RV64ZVE32F-NEXT: .LBB11_2: # %else2
@@ -617,15 +613,14 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru)
; RV64ZVE32F-NEXT: .LBB11_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB11_16
-; RV64ZVE32F-NEXT: .LBB11_8: # %else20
+; RV64ZVE32F-NEXT: # %bb.8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
+; RV64ZVE32F-NEXT: j .LBB11_1
; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -633,48 +628,42 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB11_3
+; RV64ZVE32F-NEXT: j .LBB11_2
; RV64ZVE32F-NEXT: .LBB11_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB11_4
+; RV64ZVE32F-NEXT: j .LBB11_3
; RV64ZVE32F-NEXT: .LBB11_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB11_5
+; RV64ZVE32F-NEXT: j .LBB11_4
; RV64ZVE32F-NEXT: .LBB11_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB11_6
+; RV64ZVE32F-NEXT: j .LBB11_5
; RV64ZVE32F-NEXT: .LBB11_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: beqz a2, .LBB11_7
+; RV64ZVE32F-NEXT: j .LBB11_6
; RV64ZVE32F-NEXT: .LBB11_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB11_8
+; RV64ZVE32F-NEXT: j .LBB11_7
; RV64ZVE32F-NEXT: .LBB11_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
@@ -736,7 +725,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB12_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB12_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB12_15
; RV64ZVE32F-NEXT: .LBB12_6: # %else8
@@ -745,7 +734,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8
; RV64ZVE32F-NEXT: .LBB12_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB12_9
-; RV64ZVE32F-NEXT: .LBB12_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -789,8 +778,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB12_6
+; RV64ZVE32F-NEXT: j .LBB12_5
; RV64ZVE32F-NEXT: .LBB12_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -800,8 +788,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB12_7
+; RV64ZVE32F-NEXT: j .LBB12_6
; RV64ZVE32F-NEXT: .LBB12_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -809,9 +796,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB12_8
-; RV64ZVE32F-NEXT: j .LBB12_9
+; RV64ZVE32F-NEXT: j .LBB12_7
%ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
%v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
ret <8 x i8> %v
@@ -885,17 +870,16 @@ define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB14_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB14_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB14_4
-; RV64ZVE32F-NEXT: .LBB14_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB14_3: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB14_2
+; RV64ZVE32F-NEXT: j .LBB14_1
; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -1168,7 +1152,7 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB19_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB19_1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB19_6
; RV64ZVE32F-NEXT: .LBB19_2: # %else2
@@ -1177,15 +1161,14 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthr
; RV64ZVE32F-NEXT: .LBB19_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB19_8
-; RV64ZVE32F-NEXT: .LBB19_4: # %else8
+; RV64ZVE32F-NEXT: # %bb.4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB19_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB19_2
+; RV64ZVE32F-NEXT: j .LBB19_1
; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -1193,16 +1176,14 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthr
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB19_3
+; RV64ZVE32F-NEXT: j .LBB19_2
; RV64ZVE32F-NEXT: .LBB19_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: andi a1, a1, 8
-; RV64ZVE32F-NEXT: beqz a1, .LBB19_4
+; RV64ZVE32F-NEXT: j .LBB19_3
; RV64ZVE32F-NEXT: .LBB19_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
@@ -1292,7 +1273,7 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB22_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB22_1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB22_10
; RV64ZVE32F-NEXT: .LBB22_2: # %else2
@@ -1313,15 +1294,14 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr
; RV64ZVE32F-NEXT: .LBB22_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB22_16
-; RV64ZVE32F-NEXT: .LBB22_8: # %else20
+; RV64ZVE32F-NEXT: # %bb.8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB22_2
+; RV64ZVE32F-NEXT: j .LBB22_1
; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -1329,48 +1309,42 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB22_3
+; RV64ZVE32F-NEXT: j .LBB22_2
; RV64ZVE32F-NEXT: .LBB22_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB22_4
+; RV64ZVE32F-NEXT: j .LBB22_3
; RV64ZVE32F-NEXT: .LBB22_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB22_5
+; RV64ZVE32F-NEXT: j .LBB22_4
; RV64ZVE32F-NEXT: .LBB22_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB22_6
+; RV64ZVE32F-NEXT: j .LBB22_5
; RV64ZVE32F-NEXT: .LBB22_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: beqz a2, .LBB22_7
+; RV64ZVE32F-NEXT: j .LBB22_6
; RV64ZVE32F-NEXT: .LBB22_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB22_8
+; RV64ZVE32F-NEXT: j .LBB22_7
; RV64ZVE32F-NEXT: .LBB22_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
@@ -1437,7 +1411,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB23_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB23_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB23_15
; RV64ZVE32F-NEXT: .LBB23_6: # %else8
@@ -1446,7 +1420,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: .LBB23_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB23_9
-; RV64ZVE32F-NEXT: .LBB23_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -1498,8 +1472,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB23_6
+; RV64ZVE32F-NEXT: j .LBB23_5
; RV64ZVE32F-NEXT: .LBB23_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -1511,8 +1484,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB23_7
+; RV64ZVE32F-NEXT: j .LBB23_6
; RV64ZVE32F-NEXT: .LBB23_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1523,9 +1495,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB23_8
-; RV64ZVE32F-NEXT: j .LBB23_9
+; RV64ZVE32F-NEXT: j .LBB23_7
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
@@ -1586,7 +1556,7 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB24_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB24_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB24_15
; RV64ZVE32F-NEXT: .LBB24_6: # %else8
@@ -1595,7 +1565,7 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: .LBB24_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB24_9
-; RV64ZVE32F-NEXT: .LBB24_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -1647,8 +1617,7 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB24_6
+; RV64ZVE32F-NEXT: j .LBB24_5
; RV64ZVE32F-NEXT: .LBB24_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -1660,8 +1629,7 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB24_7
+; RV64ZVE32F-NEXT: j .LBB24_6
; RV64ZVE32F-NEXT: .LBB24_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1672,9 +1640,7 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB24_8
-; RV64ZVE32F-NEXT: j .LBB24_9
+; RV64ZVE32F-NEXT: j .LBB24_7
%eidxs = sext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
@@ -1736,7 +1702,7 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB25_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB25_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB25_15
; RV64ZVE32F-NEXT: .LBB25_6: # %else8
@@ -1745,7 +1711,7 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: .LBB25_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB25_9
-; RV64ZVE32F-NEXT: .LBB25_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -1801,8 +1767,7 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB25_6
+; RV64ZVE32F-NEXT: j .LBB25_5
; RV64ZVE32F-NEXT: .LBB25_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -1815,8 +1780,7 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB25_7
+; RV64ZVE32F-NEXT: j .LBB25_6
; RV64ZVE32F-NEXT: .LBB25_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1828,9 +1792,7 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB25_8
-; RV64ZVE32F-NEXT: j .LBB25_9
+; RV64ZVE32F-NEXT: j .LBB25_7
%eidxs = zext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
@@ -1889,7 +1851,7 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB26_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB26_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB26_15
; RV64ZVE32F-NEXT: .LBB26_6: # %else8
@@ -1898,7 +1860,7 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: .LBB26_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB26_9
-; RV64ZVE32F-NEXT: .LBB26_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -1946,8 +1908,7 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB26_6
+; RV64ZVE32F-NEXT: j .LBB26_5
; RV64ZVE32F-NEXT: .LBB26_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -1958,8 +1919,7 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB26_7
+; RV64ZVE32F-NEXT: j .LBB26_6
; RV64ZVE32F-NEXT: .LBB26_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1968,9 +1928,7 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB26_8
-; RV64ZVE32F-NEXT: j .LBB26_9
+; RV64ZVE32F-NEXT: j .LBB26_7
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
@@ -2044,17 +2002,16 @@ define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB28_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB28_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB28_4
-; RV64ZVE32F-NEXT: .LBB28_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB28_3: # %cond.load
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB28_2
+; RV64ZVE32F-NEXT: j .LBB28_1
; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1
; RV64ZVE32F-NEXT: lw a0, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -2217,7 +2174,7 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB31_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB31_1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB31_6
; RV64ZVE32F-NEXT: .LBB31_2: # %else2
@@ -2226,15 +2183,14 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthr
; RV64ZVE32F-NEXT: .LBB31_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB31_8
-; RV64ZVE32F-NEXT: .LBB31_4: # %else8
+; RV64ZVE32F-NEXT: # %bb.4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB31_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB31_2
+; RV64ZVE32F-NEXT: j .LBB31_1
; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
@@ -2242,16 +2198,14 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthr
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB31_3
+; RV64ZVE32F-NEXT: j .LBB31_2
; RV64ZVE32F-NEXT: .LBB31_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: andi a1, a1, 8
-; RV64ZVE32F-NEXT: beqz a1, .LBB31_4
+; RV64ZVE32F-NEXT: j .LBB31_3
; RV64ZVE32F-NEXT: .LBB31_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lw a0, 0(a0)
@@ -2340,7 +2294,7 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB34_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB34_1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB34_10
; RV64ZVE32F-NEXT: .LBB34_2: # %else2
@@ -2361,15 +2315,14 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr
; RV64ZVE32F-NEXT: .LBB34_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
-; RV64ZVE32F-NEXT: .LBB34_8: # %else20
+; RV64ZVE32F-NEXT: # %bb.8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB34_2
+; RV64ZVE32F-NEXT: j .LBB34_1
; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
@@ -2377,48 +2330,42 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
-; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB34_3
+; RV64ZVE32F-NEXT: j .LBB34_2
; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB34_4
+; RV64ZVE32F-NEXT: j .LBB34_3
; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB34_5
+; RV64ZVE32F-NEXT: j .LBB34_4
; RV64ZVE32F-NEXT: .LBB34_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB34_6
+; RV64ZVE32F-NEXT: j .LBB34_5
; RV64ZVE32F-NEXT: .LBB34_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: beqz a2, .LBB34_7
+; RV64ZVE32F-NEXT: j .LBB34_6
; RV64ZVE32F-NEXT: .LBB34_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB34_8
+; RV64ZVE32F-NEXT: j .LBB34_7
; RV64ZVE32F-NEXT: .LBB34_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lw a0, 0(a0)
@@ -2484,7 +2431,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB35_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
; RV64ZVE32F-NEXT: .LBB35_6: # %else8
@@ -2493,7 +2440,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: .LBB35_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
-; RV64ZVE32F-NEXT: .LBB35_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -2545,8 +2492,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
+; RV64ZVE32F-NEXT: j .LBB35_5
; RV64ZVE32F-NEXT: .LBB35_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -2558,8 +2504,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
+; RV64ZVE32F-NEXT: j .LBB35_6
; RV64ZVE32F-NEXT: .LBB35_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -2570,9 +2515,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
-; RV64ZVE32F-NEXT: j .LBB35_9
+; RV64ZVE32F-NEXT: j .LBB35_7
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
@@ -2632,7 +2575,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB36_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB36_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB36_15
; RV64ZVE32F-NEXT: .LBB36_6: # %else8
@@ -2641,7 +2584,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: .LBB36_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB36_9
-; RV64ZVE32F-NEXT: .LBB36_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -2693,8 +2636,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB36_6
+; RV64ZVE32F-NEXT: j .LBB36_5
; RV64ZVE32F-NEXT: .LBB36_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -2706,8 +2648,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB36_7
+; RV64ZVE32F-NEXT: j .LBB36_6
; RV64ZVE32F-NEXT: .LBB36_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -2718,9 +2659,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB36_8
-; RV64ZVE32F-NEXT: j .LBB36_9
+; RV64ZVE32F-NEXT: j .LBB36_7
%eidxs = sext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
@@ -2784,7 +2723,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB37_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB37_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB37_15
; RV64ZVE32F-NEXT: .LBB37_6: # %else8
@@ -2793,7 +2732,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: .LBB37_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB37_9
-; RV64ZVE32F-NEXT: .LBB37_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -2849,8 +2788,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB37_6
+; RV64ZVE32F-NEXT: j .LBB37_5
; RV64ZVE32F-NEXT: .LBB37_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -2863,8 +2801,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB37_7
+; RV64ZVE32F-NEXT: j .LBB37_6
; RV64ZVE32F-NEXT: .LBB37_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -2876,9 +2813,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB37_8
-; RV64ZVE32F-NEXT: j .LBB37_9
+; RV64ZVE32F-NEXT: j .LBB37_7
%eidxs = zext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
@@ -2941,7 +2876,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB38_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB38_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB38_15
; RV64ZVE32F-NEXT: .LBB38_6: # %else8
@@ -2950,7 +2885,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: .LBB38_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB38_9
-; RV64ZVE32F-NEXT: .LBB38_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -3002,8 +2937,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB38_6
+; RV64ZVE32F-NEXT: j .LBB38_5
; RV64ZVE32F-NEXT: .LBB38_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -3015,8 +2949,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB38_7
+; RV64ZVE32F-NEXT: j .LBB38_6
; RV64ZVE32F-NEXT: .LBB38_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -3027,9 +2960,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB38_8
-; RV64ZVE32F-NEXT: j .LBB38_9
+; RV64ZVE32F-NEXT: j .LBB38_7
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
@@ -3091,7 +3022,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB39_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB39_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB39_15
; RV64ZVE32F-NEXT: .LBB39_6: # %else8
@@ -3100,7 +3031,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: .LBB39_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB39_9
-; RV64ZVE32F-NEXT: .LBB39_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -3152,8 +3083,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB39_6
+; RV64ZVE32F-NEXT: j .LBB39_5
; RV64ZVE32F-NEXT: .LBB39_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -3165,8 +3095,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB39_7
+; RV64ZVE32F-NEXT: j .LBB39_6
; RV64ZVE32F-NEXT: .LBB39_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -3177,9 +3106,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB39_8
-; RV64ZVE32F-NEXT: j .LBB39_9
+; RV64ZVE32F-NEXT: j .LBB39_7
%eidxs = sext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
@@ -3244,7 +3171,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB40_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB40_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB40_15
; RV64ZVE32F-NEXT: .LBB40_6: # %else8
@@ -3253,7 +3180,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: .LBB40_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB40_9
-; RV64ZVE32F-NEXT: .LBB40_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -3309,8 +3236,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB40_6
+; RV64ZVE32F-NEXT: j .LBB40_5
; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -3323,8 +3249,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB40_7
+; RV64ZVE32F-NEXT: j .LBB40_6
; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -3336,9 +3261,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB40_8
-; RV64ZVE32F-NEXT: j .LBB40_9
+; RV64ZVE32F-NEXT: j .LBB40_7
%eidxs = zext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
@@ -3395,7 +3318,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB41_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB41_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB41_15
; RV64ZVE32F-NEXT: .LBB41_6: # %else8
@@ -3404,7 +3327,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: .LBB41_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB41_9
-; RV64ZVE32F-NEXT: .LBB41_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -3452,8 +3375,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB41_6
+; RV64ZVE32F-NEXT: j .LBB41_5
; RV64ZVE32F-NEXT: .LBB41_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -3463,8 +3385,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB41_7
+; RV64ZVE32F-NEXT: j .LBB41_6
; RV64ZVE32F-NEXT: .LBB41_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@@ -3473,9 +3394,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB41_8
-; RV64ZVE32F-NEXT: j .LBB41_9
+; RV64ZVE32F-NEXT: j .LBB41_7
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
@@ -3547,30 +3466,30 @@ define <2 x i64> @mgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthr
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a4, v0
; RV32ZVE32F-NEXT: andi a2, a4, 1
-; RV32ZVE32F-NEXT: beqz a2, .LBB43_3
+; RV32ZVE32F-NEXT: beqz a2, .LBB43_4
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB43_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, a4, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB43_4
-; RV32ZVE32F-NEXT: .LBB43_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB43_5
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a1)
; RV32ZVE32F-NEXT: lw a1, 12(a1)
-; RV32ZVE32F-NEXT: j .LBB43_5
-; RV32ZVE32F-NEXT: .LBB43_3:
+; RV32ZVE32F-NEXT: j .LBB43_6
+; RV32ZVE32F-NEXT: .LBB43_4:
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a3, 4(a1)
-; RV32ZVE32F-NEXT: andi a4, a4, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB43_2
-; RV32ZVE32F-NEXT: .LBB43_4: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB43_2
+; RV32ZVE32F-NEXT: .LBB43_5: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a4, 0(a1)
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: .LBB43_5: # %else2
+; RV32ZVE32F-NEXT: .LBB43_6: # %else2
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -3620,56 +3539,56 @@ define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthr
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a6, v0
; RV32ZVE32F-NEXT: andi a2, a6, 1
-; RV32ZVE32F-NEXT: beqz a2, .LBB44_5
+; RV32ZVE32F-NEXT: beqz a2, .LBB44_8
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB44_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, a6, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB44_6
-; RV32ZVE32F-NEXT: .LBB44_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB44_9
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a1)
; RV32ZVE32F-NEXT: lw a5, 12(a1)
+; RV32ZVE32F-NEXT: .LBB44_4:
; RV32ZVE32F-NEXT: andi a7, a6, 4
-; RV32ZVE32F-NEXT: bnez a7, .LBB44_7
-; RV32ZVE32F-NEXT: .LBB44_3:
+; RV32ZVE32F-NEXT: bnez a7, .LBB44_10
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a7, 16(a1)
; RV32ZVE32F-NEXT: lw t0, 20(a1)
+; RV32ZVE32F-NEXT: .LBB44_6:
; RV32ZVE32F-NEXT: andi a6, a6, 8
-; RV32ZVE32F-NEXT: bnez a6, .LBB44_8
-; RV32ZVE32F-NEXT: .LBB44_4:
+; RV32ZVE32F-NEXT: bnez a6, .LBB44_11
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw a6, 24(a1)
; RV32ZVE32F-NEXT: lw a1, 28(a1)
-; RV32ZVE32F-NEXT: j .LBB44_9
-; RV32ZVE32F-NEXT: .LBB44_5:
+; RV32ZVE32F-NEXT: j .LBB44_12
+; RV32ZVE32F-NEXT: .LBB44_8:
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a3, 4(a1)
-; RV32ZVE32F-NEXT: andi a4, a6, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB44_2
-; RV32ZVE32F-NEXT: .LBB44_6: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB44_2
+; RV32ZVE32F-NEXT: .LBB44_9: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v9
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a7, a6, 4
-; RV32ZVE32F-NEXT: beqz a7, .LBB44_3
-; RV32ZVE32F-NEXT: .LBB44_7: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB44_4
+; RV32ZVE32F-NEXT: .LBB44_10: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s t0, v9
; RV32ZVE32F-NEXT: lw a7, 0(t0)
; RV32ZVE32F-NEXT: lw t0, 4(t0)
-; RV32ZVE32F-NEXT: andi a6, a6, 8
-; RV32ZVE32F-NEXT: beqz a6, .LBB44_4
-; RV32ZVE32F-NEXT: .LBB44_8: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB44_6
+; RV32ZVE32F-NEXT: .LBB44_11: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a6, 0(a1)
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: .LBB44_9: # %else8
+; RV32ZVE32F-NEXT: .LBB44_12: # %else8
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -3685,41 +3604,41 @@ define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthr
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB44_5
+; RV64ZVE32F-NEXT: beqz a3, .LBB44_8
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB44_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB44_6
-; RV64ZVE32F-NEXT: .LBB44_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB44_9
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
+; RV64ZVE32F-NEXT: .LBB44_4:
; RV64ZVE32F-NEXT: andi a6, a5, 4
-; RV64ZVE32F-NEXT: bnez a6, .LBB44_7
-; RV64ZVE32F-NEXT: .LBB44_3:
+; RV64ZVE32F-NEXT: bnez a6, .LBB44_10
+; RV64ZVE32F-NEXT: # %bb.5:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
+; RV64ZVE32F-NEXT: .LBB44_6:
; RV64ZVE32F-NEXT: andi a5, a5, 8
-; RV64ZVE32F-NEXT: bnez a5, .LBB44_8
-; RV64ZVE32F-NEXT: .LBB44_4:
+; RV64ZVE32F-NEXT: bnez a5, .LBB44_11
+; RV64ZVE32F-NEXT: # %bb.7:
; RV64ZVE32F-NEXT: ld a1, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB44_9
-; RV64ZVE32F-NEXT: .LBB44_5:
+; RV64ZVE32F-NEXT: j .LBB44_12
+; RV64ZVE32F-NEXT: .LBB44_8:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB44_2
-; RV64ZVE32F-NEXT: .LBB44_6: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB44_2
+; RV64ZVE32F-NEXT: .LBB44_9: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: andi a6, a5, 4
-; RV64ZVE32F-NEXT: beqz a6, .LBB44_3
-; RV64ZVE32F-NEXT: .LBB44_7: # %cond.load4
+; RV64ZVE32F-NEXT: j .LBB44_4
+; RV64ZVE32F-NEXT: .LBB44_10: # %cond.load4
; RV64ZVE32F-NEXT: ld a6, 16(a1)
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: andi a5, a5, 8
-; RV64ZVE32F-NEXT: beqz a5, .LBB44_4
-; RV64ZVE32F-NEXT: .LBB44_8: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB44_6
+; RV64ZVE32F-NEXT: .LBB44_11: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB44_9: # %else8
+; RV64ZVE32F-NEXT: .LBB44_12: # %else8
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -3860,82 +3779,82 @@ define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthr
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a2, t0, 1
-; RV32ZVE32F-NEXT: beqz a2, .LBB47_7
+; RV32ZVE32F-NEXT: beqz a2, .LBB47_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB47_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB47_8
-; RV32ZVE32F-NEXT: .LBB47_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB47_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a1)
; RV32ZVE32F-NEXT: lw a5, 12(a1)
+; RV32ZVE32F-NEXT: .LBB47_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB47_9
-; RV32ZVE32F-NEXT: .LBB47_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB47_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a1)
; RV32ZVE32F-NEXT: lw a7, 20(a1)
+; RV32ZVE32F-NEXT: .LBB47_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB47_10
-; RV32ZVE32F-NEXT: .LBB47_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB47_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a1)
; RV32ZVE32F-NEXT: lw t2, 28(a1)
+; RV32ZVE32F-NEXT: .LBB47_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB47_11
-; RV32ZVE32F-NEXT: .LBB47_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB47_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a1)
; RV32ZVE32F-NEXT: lw t4, 36(a1)
+; RV32ZVE32F-NEXT: .LBB47_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB47_12
-; RV32ZVE32F-NEXT: .LBB47_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB47_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a1)
; RV32ZVE32F-NEXT: lw t6, 44(a1)
-; RV32ZVE32F-NEXT: j .LBB47_13
-; RV32ZVE32F-NEXT: .LBB47_7:
+; RV32ZVE32F-NEXT: j .LBB47_18
+; RV32ZVE32F-NEXT: .LBB47_12:
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a3, 4(a1)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB47_2
-; RV32ZVE32F-NEXT: .LBB47_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB47_2
+; RV32ZVE32F-NEXT: .LBB47_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB47_3
-; RV32ZVE32F-NEXT: .LBB47_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB47_4
+; RV32ZVE32F-NEXT: .LBB47_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB47_4
-; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB47_6
+; RV32ZVE32F-NEXT: .LBB47_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB47_5
-; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB47_8
+; RV32ZVE32F-NEXT: .LBB47_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB47_6
-; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB47_10
+; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB47_13: # %else14
+; RV32ZVE32F-NEXT: .LBB47_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -3943,31 +3862,31 @@ define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthr
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB47_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB47_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB47_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB47_17
-; RV32ZVE32F-NEXT: .LBB47_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB47_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a1)
; RV32ZVE32F-NEXT: lw a1, 60(a1)
-; RV32ZVE32F-NEXT: j .LBB47_18
-; RV32ZVE32F-NEXT: .LBB47_16:
+; RV32ZVE32F-NEXT: j .LBB47_24
+; RV32ZVE32F-NEXT: .LBB47_22:
; RV32ZVE32F-NEXT: lw s0, 48(a1)
; RV32ZVE32F-NEXT: lw s1, 52(a1)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB47_15
-; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB47_20
+; RV32ZVE32F-NEXT: .LBB47_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw t0, 0(a1)
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: .LBB47_18: # %else20
+; RV32ZVE32F-NEXT: .LBB47_24: # %else20
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -3997,77 +3916,77 @@ define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthr
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a6, v0
; RV64ZVE32F-NEXT: andi a3, a6, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB47_9
+; RV64ZVE32F-NEXT: beqz a3, .LBB47_16
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB47_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a6, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB47_10
-; RV64ZVE32F-NEXT: .LBB47_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB47_17
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
+; RV64ZVE32F-NEXT: .LBB47_4:
; RV64ZVE32F-NEXT: andi a5, a6, 4
-; RV64ZVE32F-NEXT: bnez a5, .LBB47_11
-; RV64ZVE32F-NEXT: .LBB47_3:
+; RV64ZVE32F-NEXT: bnez a5, .LBB47_18
+; RV64ZVE32F-NEXT: # %bb.5:
; RV64ZVE32F-NEXT: ld a5, 16(a2)
+; RV64ZVE32F-NEXT: .LBB47_6:
; RV64ZVE32F-NEXT: andi a7, a6, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB47_12
-; RV64ZVE32F-NEXT: .LBB47_4:
+; RV64ZVE32F-NEXT: bnez a7, .LBB47_19
+; RV64ZVE32F-NEXT: # %bb.7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: .LBB47_8:
; RV64ZVE32F-NEXT: andi t0, a6, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB47_13
-; RV64ZVE32F-NEXT: .LBB47_5:
+; RV64ZVE32F-NEXT: bnez t0, .LBB47_20
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: .LBB47_10:
; RV64ZVE32F-NEXT: andi t1, a6, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB47_14
-; RV64ZVE32F-NEXT: .LBB47_6:
+; RV64ZVE32F-NEXT: bnez t1, .LBB47_21
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
+; RV64ZVE32F-NEXT: .LBB47_12:
; RV64ZVE32F-NEXT: andi t2, a6, 64
-; RV64ZVE32F-NEXT: bnez t2, .LBB47_15
-; RV64ZVE32F-NEXT: .LBB47_7:
+; RV64ZVE32F-NEXT: bnez t2, .LBB47_22
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: .LBB47_14:
; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: bnez a6, .LBB47_16
-; RV64ZVE32F-NEXT: .LBB47_8:
+; RV64ZVE32F-NEXT: bnez a6, .LBB47_23
+; RV64ZVE32F-NEXT: # %bb.15:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB47_17
-; RV64ZVE32F-NEXT: .LBB47_9:
+; RV64ZVE32F-NEXT: j .LBB47_24
+; RV64ZVE32F-NEXT: .LBB47_16:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a6, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB47_2
-; RV64ZVE32F-NEXT: .LBB47_10: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB47_2
+; RV64ZVE32F-NEXT: .LBB47_17: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: andi a5, a6, 4
-; RV64ZVE32F-NEXT: beqz a5, .LBB47_3
-; RV64ZVE32F-NEXT: .LBB47_11: # %cond.load4
+; RV64ZVE32F-NEXT: j .LBB47_4
+; RV64ZVE32F-NEXT: .LBB47_18: # %cond.load4
; RV64ZVE32F-NEXT: ld a5, 16(a1)
; RV64ZVE32F-NEXT: ld a5, 0(a5)
-; RV64ZVE32F-NEXT: andi a7, a6, 8
-; RV64ZVE32F-NEXT: beqz a7, .LBB47_4
-; RV64ZVE32F-NEXT: .LBB47_12: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB47_6
+; RV64ZVE32F-NEXT: .LBB47_19: # %cond.load7
; RV64ZVE32F-NEXT: ld a7, 24(a1)
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a6, 16
-; RV64ZVE32F-NEXT: beqz t0, .LBB47_5
-; RV64ZVE32F-NEXT: .LBB47_13: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB47_8
+; RV64ZVE32F-NEXT: .LBB47_20: # %cond.load10
; RV64ZVE32F-NEXT: ld t0, 32(a1)
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a6, 32
-; RV64ZVE32F-NEXT: beqz t1, .LBB47_6
-; RV64ZVE32F-NEXT: .LBB47_14: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB47_10
+; RV64ZVE32F-NEXT: .LBB47_21: # %cond.load13
; RV64ZVE32F-NEXT: ld t1, 40(a1)
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: andi t2, a6, 64
-; RV64ZVE32F-NEXT: beqz t2, .LBB47_7
-; RV64ZVE32F-NEXT: .LBB47_15: # %cond.load16
+; RV64ZVE32F-NEXT: j .LBB47_12
+; RV64ZVE32F-NEXT: .LBB47_22: # %cond.load16
; RV64ZVE32F-NEXT: ld t2, 48(a1)
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: beqz a6, .LBB47_8
-; RV64ZVE32F-NEXT: .LBB47_16: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB47_14
+; RV64ZVE32F-NEXT: .LBB47_23: # %cond.load19
; RV64ZVE32F-NEXT: ld a1, 56(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB47_17: # %else20
+; RV64ZVE32F-NEXT: .LBB47_24: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a5, 16(a0)
@@ -4111,81 +4030,81 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB48_7
+; RV32ZVE32F-NEXT: beqz a3, .LBB48_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB48_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB48_8
-; RV32ZVE32F-NEXT: .LBB48_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB48_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
+; RV32ZVE32F-NEXT: .LBB48_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB48_9
-; RV32ZVE32F-NEXT: .LBB48_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB48_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
+; RV32ZVE32F-NEXT: .LBB48_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB48_10
-; RV32ZVE32F-NEXT: .LBB48_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB48_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
+; RV32ZVE32F-NEXT: .LBB48_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB48_11
-; RV32ZVE32F-NEXT: .LBB48_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB48_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
+; RV32ZVE32F-NEXT: .LBB48_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB48_12
-; RV32ZVE32F-NEXT: .LBB48_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB48_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB48_13
-; RV32ZVE32F-NEXT: .LBB48_7:
+; RV32ZVE32F-NEXT: j .LBB48_18
+; RV32ZVE32F-NEXT: .LBB48_12:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB48_2
-; RV32ZVE32F-NEXT: .LBB48_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB48_2
+; RV32ZVE32F-NEXT: .LBB48_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB48_3
-; RV32ZVE32F-NEXT: .LBB48_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB48_4
+; RV32ZVE32F-NEXT: .LBB48_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB48_4
-; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB48_6
+; RV32ZVE32F-NEXT: .LBB48_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB48_5
-; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB48_8
+; RV32ZVE32F-NEXT: .LBB48_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB48_6
-; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB48_10
+; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB48_13: # %else14
+; RV32ZVE32F-NEXT: .LBB48_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -4193,31 +4112,31 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB48_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB48_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB48_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB48_17
-; RV32ZVE32F-NEXT: .LBB48_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB48_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB48_18
-; RV32ZVE32F-NEXT: .LBB48_16:
+; RV32ZVE32F-NEXT: j .LBB48_24
+; RV32ZVE32F-NEXT: .LBB48_22:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB48_15
-; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB48_20
+; RV32ZVE32F-NEXT: .LBB48_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB48_18: # %else20
+; RV32ZVE32F-NEXT: .LBB48_24: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -4247,103 +4166,103 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB48_3
+; RV64ZVE32F-NEXT: beqz a3, .LBB48_4
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB48_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB48_4
-; RV64ZVE32F-NEXT: .LBB48_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB48_5
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB48_5
-; RV64ZVE32F-NEXT: .LBB48_3:
+; RV64ZVE32F-NEXT: j .LBB48_6
+; RV64ZVE32F-NEXT: .LBB48_4:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB48_2
-; RV64ZVE32F-NEXT: .LBB48_4: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB48_2
+; RV64ZVE32F-NEXT: .LBB48_5: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB48_5: # %else2
+; RV64ZVE32F-NEXT: .LBB48_6: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB48_10
-; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB48_14
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: .LBB48_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB48_11
-; RV64ZVE32F-NEXT: .LBB48_7:
+; RV64ZVE32F-NEXT: bnez a7, .LBB48_15
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: .LBB48_10:
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB48_12
-; RV64ZVE32F-NEXT: .LBB48_8:
+; RV64ZVE32F-NEXT: bnez t0, .LBB48_16
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: .LBB48_12:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB48_13
-; RV64ZVE32F-NEXT: .LBB48_9:
+; RV64ZVE32F-NEXT: bnez t1, .LBB48_17
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB48_14
-; RV64ZVE32F-NEXT: .LBB48_10:
+; RV64ZVE32F-NEXT: j .LBB48_18
+; RV64ZVE32F-NEXT: .LBB48_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: beqz a7, .LBB48_7
-; RV64ZVE32F-NEXT: .LBB48_11: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB48_8
+; RV64ZVE32F-NEXT: .LBB48_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: beqz t0, .LBB48_8
-; RV64ZVE32F-NEXT: .LBB48_12: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB48_10
+; RV64ZVE32F-NEXT: .LBB48_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: beqz t1, .LBB48_9
-; RV64ZVE32F-NEXT: .LBB48_13: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB48_12
+; RV64ZVE32F-NEXT: .LBB48_17: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB48_14: # %else14
+; RV64ZVE32F-NEXT: .LBB48_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB48_17
-; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB48_22
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: .LBB48_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB48_18
-; RV64ZVE32F-NEXT: .LBB48_16:
+; RV64ZVE32F-NEXT: bnez a5, .LBB48_23
+; RV64ZVE32F-NEXT: # %bb.21:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB48_19
-; RV64ZVE32F-NEXT: .LBB48_17:
+; RV64ZVE32F-NEXT: j .LBB48_24
+; RV64ZVE32F-NEXT: .LBB48_22:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: beqz a5, .LBB48_16
-; RV64ZVE32F-NEXT: .LBB48_18: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB48_20
+; RV64ZVE32F-NEXT: .LBB48_23: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB48_19: # %else20
+; RV64ZVE32F-NEXT: .LBB48_24: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -4388,81 +4307,81 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB49_7
+; RV32ZVE32F-NEXT: beqz a3, .LBB49_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB49_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB49_8
-; RV32ZVE32F-NEXT: .LBB49_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB49_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
+; RV32ZVE32F-NEXT: .LBB49_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB49_9
-; RV32ZVE32F-NEXT: .LBB49_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB49_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
+; RV32ZVE32F-NEXT: .LBB49_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB49_10
-; RV32ZVE32F-NEXT: .LBB49_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB49_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
+; RV32ZVE32F-NEXT: .LBB49_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB49_11
-; RV32ZVE32F-NEXT: .LBB49_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB49_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
+; RV32ZVE32F-NEXT: .LBB49_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB49_12
-; RV32ZVE32F-NEXT: .LBB49_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB49_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB49_13
-; RV32ZVE32F-NEXT: .LBB49_7:
+; RV32ZVE32F-NEXT: j .LBB49_18
+; RV32ZVE32F-NEXT: .LBB49_12:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB49_2
-; RV32ZVE32F-NEXT: .LBB49_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB49_2
+; RV32ZVE32F-NEXT: .LBB49_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB49_3
-; RV32ZVE32F-NEXT: .LBB49_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB49_4
+; RV32ZVE32F-NEXT: .LBB49_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB49_4
-; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB49_6
+; RV32ZVE32F-NEXT: .LBB49_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB49_5
-; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB49_8
+; RV32ZVE32F-NEXT: .LBB49_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB49_6
-; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB49_10
+; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB49_13: # %else14
+; RV32ZVE32F-NEXT: .LBB49_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -4470,31 +4389,31 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB49_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB49_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB49_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB49_17
-; RV32ZVE32F-NEXT: .LBB49_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB49_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB49_18
-; RV32ZVE32F-NEXT: .LBB49_16:
+; RV32ZVE32F-NEXT: j .LBB49_24
+; RV32ZVE32F-NEXT: .LBB49_22:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB49_15
-; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB49_20
+; RV32ZVE32F-NEXT: .LBB49_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB49_18: # %else20
+; RV32ZVE32F-NEXT: .LBB49_24: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -4524,103 +4443,103 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB49_3
+; RV64ZVE32F-NEXT: beqz a3, .LBB49_4
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB49_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB49_4
-; RV64ZVE32F-NEXT: .LBB49_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB49_5
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB49_5
-; RV64ZVE32F-NEXT: .LBB49_3:
+; RV64ZVE32F-NEXT: j .LBB49_6
+; RV64ZVE32F-NEXT: .LBB49_4:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB49_2
-; RV64ZVE32F-NEXT: .LBB49_4: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB49_2
+; RV64ZVE32F-NEXT: .LBB49_5: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB49_5: # %else2
+; RV64ZVE32F-NEXT: .LBB49_6: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB49_10
-; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB49_14
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: .LBB49_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB49_11
-; RV64ZVE32F-NEXT: .LBB49_7:
+; RV64ZVE32F-NEXT: bnez a7, .LBB49_15
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: .LBB49_10:
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB49_12
-; RV64ZVE32F-NEXT: .LBB49_8:
+; RV64ZVE32F-NEXT: bnez t0, .LBB49_16
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: .LBB49_12:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB49_13
-; RV64ZVE32F-NEXT: .LBB49_9:
+; RV64ZVE32F-NEXT: bnez t1, .LBB49_17
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB49_14
-; RV64ZVE32F-NEXT: .LBB49_10:
+; RV64ZVE32F-NEXT: j .LBB49_18
+; RV64ZVE32F-NEXT: .LBB49_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: beqz a7, .LBB49_7
-; RV64ZVE32F-NEXT: .LBB49_11: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB49_8
+; RV64ZVE32F-NEXT: .LBB49_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: beqz t0, .LBB49_8
-; RV64ZVE32F-NEXT: .LBB49_12: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB49_10
+; RV64ZVE32F-NEXT: .LBB49_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: beqz t1, .LBB49_9
-; RV64ZVE32F-NEXT: .LBB49_13: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB49_12
+; RV64ZVE32F-NEXT: .LBB49_17: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB49_14: # %else14
+; RV64ZVE32F-NEXT: .LBB49_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB49_17
-; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB49_22
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: .LBB49_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB49_18
-; RV64ZVE32F-NEXT: .LBB49_16:
+; RV64ZVE32F-NEXT: bnez a5, .LBB49_23
+; RV64ZVE32F-NEXT: # %bb.21:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB49_19
-; RV64ZVE32F-NEXT: .LBB49_17:
+; RV64ZVE32F-NEXT: j .LBB49_24
+; RV64ZVE32F-NEXT: .LBB49_22:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: beqz a5, .LBB49_16
-; RV64ZVE32F-NEXT: .LBB49_18: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB49_20
+; RV64ZVE32F-NEXT: .LBB49_23: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB49_19: # %else20
+; RV64ZVE32F-NEXT: .LBB49_24: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -4667,81 +4586,81 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB50_7
+; RV32ZVE32F-NEXT: beqz a3, .LBB50_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB50_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB50_8
-; RV32ZVE32F-NEXT: .LBB50_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB50_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
+; RV32ZVE32F-NEXT: .LBB50_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB50_9
-; RV32ZVE32F-NEXT: .LBB50_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB50_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
+; RV32ZVE32F-NEXT: .LBB50_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB50_10
-; RV32ZVE32F-NEXT: .LBB50_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB50_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
+; RV32ZVE32F-NEXT: .LBB50_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB50_11
-; RV32ZVE32F-NEXT: .LBB50_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB50_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
+; RV32ZVE32F-NEXT: .LBB50_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB50_12
-; RV32ZVE32F-NEXT: .LBB50_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB50_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB50_13
-; RV32ZVE32F-NEXT: .LBB50_7:
+; RV32ZVE32F-NEXT: j .LBB50_18
+; RV32ZVE32F-NEXT: .LBB50_12:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB50_2
-; RV32ZVE32F-NEXT: .LBB50_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB50_2
+; RV32ZVE32F-NEXT: .LBB50_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB50_3
-; RV32ZVE32F-NEXT: .LBB50_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB50_4
+; RV32ZVE32F-NEXT: .LBB50_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB50_4
-; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB50_6
+; RV32ZVE32F-NEXT: .LBB50_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB50_5
-; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB50_8
+; RV32ZVE32F-NEXT: .LBB50_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB50_6
-; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB50_10
+; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB50_13: # %else14
+; RV32ZVE32F-NEXT: .LBB50_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -4749,31 +4668,31 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB50_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB50_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB50_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB50_17
-; RV32ZVE32F-NEXT: .LBB50_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB50_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB50_18
-; RV32ZVE32F-NEXT: .LBB50_16:
+; RV32ZVE32F-NEXT: j .LBB50_24
+; RV32ZVE32F-NEXT: .LBB50_22:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB50_15
-; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB50_20
+; RV32ZVE32F-NEXT: .LBB50_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB50_18: # %else20
+; RV32ZVE32F-NEXT: .LBB50_24: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -4803,23 +4722,23 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB50_3
+; RV64ZVE32F-NEXT: beqz a3, .LBB50_4
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB50_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB50_4
-; RV64ZVE32F-NEXT: .LBB50_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB50_5
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB50_5
-; RV64ZVE32F-NEXT: .LBB50_3:
+; RV64ZVE32F-NEXT: j .LBB50_6
+; RV64ZVE32F-NEXT: .LBB50_4:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB50_2
-; RV64ZVE32F-NEXT: .LBB50_4: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB50_2
+; RV64ZVE32F-NEXT: .LBB50_5: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
@@ -4827,87 +4746,87 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB50_5: # %else2
+; RV64ZVE32F-NEXT: .LBB50_6: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB50_10
-; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB50_14
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: andi a6, a6, 255
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: .LBB50_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB50_11
-; RV64ZVE32F-NEXT: .LBB50_7:
+; RV64ZVE32F-NEXT: bnez a7, .LBB50_15
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: .LBB50_10:
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB50_12
-; RV64ZVE32F-NEXT: .LBB50_8:
+; RV64ZVE32F-NEXT: bnez t0, .LBB50_16
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: .LBB50_12:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB50_13
-; RV64ZVE32F-NEXT: .LBB50_9:
+; RV64ZVE32F-NEXT: bnez t1, .LBB50_17
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB50_14
-; RV64ZVE32F-NEXT: .LBB50_10:
+; RV64ZVE32F-NEXT: j .LBB50_18
+; RV64ZVE32F-NEXT: .LBB50_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: beqz a7, .LBB50_7
-; RV64ZVE32F-NEXT: .LBB50_11: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB50_8
+; RV64ZVE32F-NEXT: .LBB50_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: andi a7, a7, 255
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: beqz t0, .LBB50_8
-; RV64ZVE32F-NEXT: .LBB50_12: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB50_10
+; RV64ZVE32F-NEXT: .LBB50_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: andi t0, t0, 255
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: beqz t1, .LBB50_9
-; RV64ZVE32F-NEXT: .LBB50_13: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB50_12
+; RV64ZVE32F-NEXT: .LBB50_17: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: andi t1, t1, 255
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB50_14: # %else14
+; RV64ZVE32F-NEXT: .LBB50_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB50_17
-; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB50_22
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: andi t2, t2, 255
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: .LBB50_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB50_18
-; RV64ZVE32F-NEXT: .LBB50_16:
+; RV64ZVE32F-NEXT: bnez a5, .LBB50_23
+; RV64ZVE32F-NEXT: # %bb.21:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB50_19
-; RV64ZVE32F-NEXT: .LBB50_17:
+; RV64ZVE32F-NEXT: j .LBB50_24
+; RV64ZVE32F-NEXT: .LBB50_22:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: beqz a5, .LBB50_16
-; RV64ZVE32F-NEXT: .LBB50_18: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB50_20
+; RV64ZVE32F-NEXT: .LBB50_23: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB50_19: # %else20
+; RV64ZVE32F-NEXT: .LBB50_24: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -4953,82 +4872,82 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8
-; RV32ZVE32F-NEXT: beqz a3, .LBB51_7
+; RV32ZVE32F-NEXT: beqz a3, .LBB51_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v10
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB51_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB51_8
-; RV32ZVE32F-NEXT: .LBB51_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB51_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
+; RV32ZVE32F-NEXT: .LBB51_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB51_9
-; RV32ZVE32F-NEXT: .LBB51_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB51_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
+; RV32ZVE32F-NEXT: .LBB51_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB51_10
-; RV32ZVE32F-NEXT: .LBB51_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB51_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
+; RV32ZVE32F-NEXT: .LBB51_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB51_11
-; RV32ZVE32F-NEXT: .LBB51_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB51_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
+; RV32ZVE32F-NEXT: .LBB51_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB51_12
-; RV32ZVE32F-NEXT: .LBB51_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB51_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB51_13
-; RV32ZVE32F-NEXT: .LBB51_7:
+; RV32ZVE32F-NEXT: j .LBB51_18
+; RV32ZVE32F-NEXT: .LBB51_12:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB51_2
-; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB51_2
+; RV32ZVE32F-NEXT: .LBB51_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v8
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB51_3
-; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB51_4
+; RV32ZVE32F-NEXT: .LBB51_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v8
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB51_4
-; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB51_6
+; RV32ZVE32F-NEXT: .LBB51_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v8
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB51_5
-; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB51_8
+; RV32ZVE32F-NEXT: .LBB51_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v8
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB51_6
-; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB51_10
+; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v8
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB51_13: # %else14
+; RV32ZVE32F-NEXT: .LBB51_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -5036,31 +4955,31 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB51_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB51_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v8
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB51_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB51_17
-; RV32ZVE32F-NEXT: .LBB51_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB51_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB51_18
-; RV32ZVE32F-NEXT: .LBB51_16:
+; RV32ZVE32F-NEXT: j .LBB51_24
+; RV32ZVE32F-NEXT: .LBB51_22:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB51_15
-; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB51_20
+; RV32ZVE32F-NEXT: .LBB51_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB51_18: # %else20
+; RV32ZVE32F-NEXT: .LBB51_24: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -5090,104 +5009,104 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB51_3
+; RV64ZVE32F-NEXT: beqz a3, .LBB51_4
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB51_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB51_4
-; RV64ZVE32F-NEXT: .LBB51_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB51_5
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB51_5
-; RV64ZVE32F-NEXT: .LBB51_3:
+; RV64ZVE32F-NEXT: j .LBB51_6
+; RV64ZVE32F-NEXT: .LBB51_4:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB51_2
-; RV64ZVE32F-NEXT: .LBB51_4: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB51_2
+; RV64ZVE32F-NEXT: .LBB51_5: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB51_5: # %else2
+; RV64ZVE32F-NEXT: .LBB51_6: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB51_10
-; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB51_14
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: .LBB51_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB51_11
-; RV64ZVE32F-NEXT: .LBB51_7:
+; RV64ZVE32F-NEXT: bnez a7, .LBB51_15
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: .LBB51_10:
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB51_12
-; RV64ZVE32F-NEXT: .LBB51_8:
+; RV64ZVE32F-NEXT: bnez t0, .LBB51_16
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: .LBB51_12:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB51_13
-; RV64ZVE32F-NEXT: .LBB51_9:
+; RV64ZVE32F-NEXT: bnez t1, .LBB51_17
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB51_14
-; RV64ZVE32F-NEXT: .LBB51_10:
+; RV64ZVE32F-NEXT: j .LBB51_18
+; RV64ZVE32F-NEXT: .LBB51_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: beqz a7, .LBB51_7
-; RV64ZVE32F-NEXT: .LBB51_11: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB51_8
+; RV64ZVE32F-NEXT: .LBB51_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: beqz t0, .LBB51_8
-; RV64ZVE32F-NEXT: .LBB51_12: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB51_10
+; RV64ZVE32F-NEXT: .LBB51_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: beqz t1, .LBB51_9
-; RV64ZVE32F-NEXT: .LBB51_13: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB51_12
+; RV64ZVE32F-NEXT: .LBB51_17: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB51_14: # %else14
+; RV64ZVE32F-NEXT: .LBB51_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB51_17
-; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB51_22
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: .LBB51_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB51_18
-; RV64ZVE32F-NEXT: .LBB51_16:
+; RV64ZVE32F-NEXT: bnez a5, .LBB51_23
+; RV64ZVE32F-NEXT: # %bb.21:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB51_19
-; RV64ZVE32F-NEXT: .LBB51_17:
+; RV64ZVE32F-NEXT: j .LBB51_24
+; RV64ZVE32F-NEXT: .LBB51_22:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: beqz a5, .LBB51_16
-; RV64ZVE32F-NEXT: .LBB51_18: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB51_20
+; RV64ZVE32F-NEXT: .LBB51_23: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB51_19: # %else20
+; RV64ZVE32F-NEXT: .LBB51_24: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -5232,82 +5151,82 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8
-; RV32ZVE32F-NEXT: beqz a3, .LBB52_7
+; RV32ZVE32F-NEXT: beqz a3, .LBB52_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v10
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB52_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB52_8
-; RV32ZVE32F-NEXT: .LBB52_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB52_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
+; RV32ZVE32F-NEXT: .LBB52_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB52_9
-; RV32ZVE32F-NEXT: .LBB52_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB52_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
+; RV32ZVE32F-NEXT: .LBB52_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB52_10
-; RV32ZVE32F-NEXT: .LBB52_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB52_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
+; RV32ZVE32F-NEXT: .LBB52_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB52_11
-; RV32ZVE32F-NEXT: .LBB52_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB52_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
+; RV32ZVE32F-NEXT: .LBB52_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB52_12
-; RV32ZVE32F-NEXT: .LBB52_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB52_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB52_13
-; RV32ZVE32F-NEXT: .LBB52_7:
+; RV32ZVE32F-NEXT: j .LBB52_18
+; RV32ZVE32F-NEXT: .LBB52_12:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB52_2
-; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB52_2
+; RV32ZVE32F-NEXT: .LBB52_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v8
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB52_3
-; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB52_4
+; RV32ZVE32F-NEXT: .LBB52_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v8
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB52_4
-; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB52_6
+; RV32ZVE32F-NEXT: .LBB52_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v8
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB52_5
-; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB52_8
+; RV32ZVE32F-NEXT: .LBB52_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v8
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB52_6
-; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB52_10
+; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v8
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB52_13: # %else14
+; RV32ZVE32F-NEXT: .LBB52_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -5315,31 +5234,31 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB52_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB52_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v8
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB52_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB52_17
-; RV32ZVE32F-NEXT: .LBB52_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB52_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB52_18
-; RV32ZVE32F-NEXT: .LBB52_16:
+; RV32ZVE32F-NEXT: j .LBB52_24
+; RV32ZVE32F-NEXT: .LBB52_22:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB52_15
-; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB52_20
+; RV32ZVE32F-NEXT: .LBB52_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB52_18: # %else20
+; RV32ZVE32F-NEXT: .LBB52_24: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -5369,104 +5288,104 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB52_3
+; RV64ZVE32F-NEXT: beqz a3, .LBB52_4
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB52_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB52_4
-; RV64ZVE32F-NEXT: .LBB52_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB52_5
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB52_5
-; RV64ZVE32F-NEXT: .LBB52_3:
+; RV64ZVE32F-NEXT: j .LBB52_6
+; RV64ZVE32F-NEXT: .LBB52_4:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB52_2
-; RV64ZVE32F-NEXT: .LBB52_4: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB52_2
+; RV64ZVE32F-NEXT: .LBB52_5: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB52_5: # %else2
+; RV64ZVE32F-NEXT: .LBB52_6: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB52_10
-; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB52_14
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: .LBB52_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB52_11
-; RV64ZVE32F-NEXT: .LBB52_7:
+; RV64ZVE32F-NEXT: bnez a7, .LBB52_15
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: .LBB52_10:
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB52_12
-; RV64ZVE32F-NEXT: .LBB52_8:
+; RV64ZVE32F-NEXT: bnez t0, .LBB52_16
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: .LBB52_12:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB52_13
-; RV64ZVE32F-NEXT: .LBB52_9:
+; RV64ZVE32F-NEXT: bnez t1, .LBB52_17
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB52_14
-; RV64ZVE32F-NEXT: .LBB52_10:
+; RV64ZVE32F-NEXT: j .LBB52_18
+; RV64ZVE32F-NEXT: .LBB52_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: beqz a7, .LBB52_7
-; RV64ZVE32F-NEXT: .LBB52_11: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB52_8
+; RV64ZVE32F-NEXT: .LBB52_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: beqz t0, .LBB52_8
-; RV64ZVE32F-NEXT: .LBB52_12: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB52_10
+; RV64ZVE32F-NEXT: .LBB52_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: beqz t1, .LBB52_9
-; RV64ZVE32F-NEXT: .LBB52_13: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB52_12
+; RV64ZVE32F-NEXT: .LBB52_17: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB52_14: # %else14
+; RV64ZVE32F-NEXT: .LBB52_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB52_17
-; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB52_22
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: .LBB52_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB52_18
-; RV64ZVE32F-NEXT: .LBB52_16:
+; RV64ZVE32F-NEXT: bnez a5, .LBB52_23
+; RV64ZVE32F-NEXT: # %bb.21:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB52_19
-; RV64ZVE32F-NEXT: .LBB52_17:
+; RV64ZVE32F-NEXT: j .LBB52_24
+; RV64ZVE32F-NEXT: .LBB52_22:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: beqz a5, .LBB52_16
-; RV64ZVE32F-NEXT: .LBB52_18: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB52_20
+; RV64ZVE32F-NEXT: .LBB52_23: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB52_19: # %else20
+; RV64ZVE32F-NEXT: .LBB52_24: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -5513,82 +5432,82 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccu.vx v10, a1, v8
-; RV32ZVE32F-NEXT: beqz a3, .LBB53_7
+; RV32ZVE32F-NEXT: beqz a3, .LBB53_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v10
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB53_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB53_8
-; RV32ZVE32F-NEXT: .LBB53_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB53_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
+; RV32ZVE32F-NEXT: .LBB53_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB53_9
-; RV32ZVE32F-NEXT: .LBB53_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB53_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
+; RV32ZVE32F-NEXT: .LBB53_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB53_10
-; RV32ZVE32F-NEXT: .LBB53_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB53_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
+; RV32ZVE32F-NEXT: .LBB53_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB53_11
-; RV32ZVE32F-NEXT: .LBB53_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB53_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
+; RV32ZVE32F-NEXT: .LBB53_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB53_12
-; RV32ZVE32F-NEXT: .LBB53_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB53_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB53_13
-; RV32ZVE32F-NEXT: .LBB53_7:
+; RV32ZVE32F-NEXT: j .LBB53_18
+; RV32ZVE32F-NEXT: .LBB53_12:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB53_2
-; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB53_2
+; RV32ZVE32F-NEXT: .LBB53_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v8
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB53_3
-; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB53_4
+; RV32ZVE32F-NEXT: .LBB53_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v8
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB53_4
-; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB53_6
+; RV32ZVE32F-NEXT: .LBB53_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v8
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB53_5
-; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB53_8
+; RV32ZVE32F-NEXT: .LBB53_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v8
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB53_6
-; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB53_10
+; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v8
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB53_13: # %else14
+; RV32ZVE32F-NEXT: .LBB53_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -5596,31 +5515,31 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB53_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB53_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v8
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB53_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB53_17
-; RV32ZVE32F-NEXT: .LBB53_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB53_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB53_18
-; RV32ZVE32F-NEXT: .LBB53_16:
+; RV32ZVE32F-NEXT: j .LBB53_24
+; RV32ZVE32F-NEXT: .LBB53_22:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB53_15
-; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB53_20
+; RV32ZVE32F-NEXT: .LBB53_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB53_18: # %else20
+; RV32ZVE32F-NEXT: .LBB53_24: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -5650,7 +5569,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB53_3
+; RV64ZVE32F-NEXT: beqz a3, .LBB53_4
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -5658,16 +5577,16 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: srli a3, a3, 45
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB53_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB53_4
-; RV64ZVE32F-NEXT: .LBB53_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB53_5
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB53_5
-; RV64ZVE32F-NEXT: .LBB53_3:
+; RV64ZVE32F-NEXT: j .LBB53_6
+; RV64ZVE32F-NEXT: .LBB53_4:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB53_2
-; RV64ZVE32F-NEXT: .LBB53_4: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB53_2
+; RV64ZVE32F-NEXT: .LBB53_5: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
@@ -5675,87 +5594,87 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: srli a4, a4, 45
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB53_5: # %else2
+; RV64ZVE32F-NEXT: .LBB53_6: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB53_10
-; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB53_14
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 48
; RV64ZVE32F-NEXT: srli a6, a6, 45
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: .LBB53_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB53_11
-; RV64ZVE32F-NEXT: .LBB53_7:
+; RV64ZVE32F-NEXT: bnez a7, .LBB53_15
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: .LBB53_10:
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB53_12
-; RV64ZVE32F-NEXT: .LBB53_8:
+; RV64ZVE32F-NEXT: bnez t0, .LBB53_16
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: .LBB53_12:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB53_13
-; RV64ZVE32F-NEXT: .LBB53_9:
+; RV64ZVE32F-NEXT: bnez t1, .LBB53_17
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB53_14
-; RV64ZVE32F-NEXT: .LBB53_10:
+; RV64ZVE32F-NEXT: j .LBB53_18
+; RV64ZVE32F-NEXT: .LBB53_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: beqz a7, .LBB53_7
-; RV64ZVE32F-NEXT: .LBB53_11: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB53_8
+; RV64ZVE32F-NEXT: .LBB53_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 48
; RV64ZVE32F-NEXT: srli a7, a7, 45
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: beqz t0, .LBB53_8
-; RV64ZVE32F-NEXT: .LBB53_12: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB53_10
+; RV64ZVE32F-NEXT: .LBB53_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 48
; RV64ZVE32F-NEXT: srli t0, t0, 45
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: beqz t1, .LBB53_9
-; RV64ZVE32F-NEXT: .LBB53_13: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB53_12
+; RV64ZVE32F-NEXT: .LBB53_17: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 48
; RV64ZVE32F-NEXT: srli t1, t1, 45
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB53_14: # %else14
+; RV64ZVE32F-NEXT: .LBB53_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB53_17
-; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB53_22
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 48
; RV64ZVE32F-NEXT: srli t2, t2, 45
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: .LBB53_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB53_18
-; RV64ZVE32F-NEXT: .LBB53_16:
+; RV64ZVE32F-NEXT: bnez a5, .LBB53_23
+; RV64ZVE32F-NEXT: # %bb.21:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB53_19
-; RV64ZVE32F-NEXT: .LBB53_17:
+; RV64ZVE32F-NEXT: j .LBB53_24
+; RV64ZVE32F-NEXT: .LBB53_22:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: beqz a5, .LBB53_16
-; RV64ZVE32F-NEXT: .LBB53_18: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB53_20
+; RV64ZVE32F-NEXT: .LBB53_23: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 48
; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB53_19: # %else20
+; RV64ZVE32F-NEXT: .LBB53_24: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -5800,81 +5719,81 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB54_7
+; RV32ZVE32F-NEXT: beqz a3, .LBB54_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB54_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB54_8
-; RV32ZVE32F-NEXT: .LBB54_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB54_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
+; RV32ZVE32F-NEXT: .LBB54_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB54_9
-; RV32ZVE32F-NEXT: .LBB54_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB54_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
+; RV32ZVE32F-NEXT: .LBB54_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB54_10
-; RV32ZVE32F-NEXT: .LBB54_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB54_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
+; RV32ZVE32F-NEXT: .LBB54_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB54_11
-; RV32ZVE32F-NEXT: .LBB54_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB54_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
+; RV32ZVE32F-NEXT: .LBB54_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB54_12
-; RV32ZVE32F-NEXT: .LBB54_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB54_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB54_13
-; RV32ZVE32F-NEXT: .LBB54_7:
+; RV32ZVE32F-NEXT: j .LBB54_18
+; RV32ZVE32F-NEXT: .LBB54_12:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB54_2
-; RV32ZVE32F-NEXT: .LBB54_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB54_2
+; RV32ZVE32F-NEXT: .LBB54_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB54_3
-; RV32ZVE32F-NEXT: .LBB54_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB54_4
+; RV32ZVE32F-NEXT: .LBB54_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB54_4
-; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB54_6
+; RV32ZVE32F-NEXT: .LBB54_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB54_5
-; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB54_8
+; RV32ZVE32F-NEXT: .LBB54_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB54_6
-; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB54_10
+; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB54_13: # %else14
+; RV32ZVE32F-NEXT: .LBB54_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -5882,31 +5801,31 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB54_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB54_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB54_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB54_17
-; RV32ZVE32F-NEXT: .LBB54_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB54_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB54_18
-; RV32ZVE32F-NEXT: .LBB54_16:
+; RV32ZVE32F-NEXT: j .LBB54_24
+; RV32ZVE32F-NEXT: .LBB54_22:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB54_15
-; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB54_20
+; RV32ZVE32F-NEXT: .LBB54_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB54_18: # %else20
+; RV32ZVE32F-NEXT: .LBB54_24: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -5936,104 +5855,104 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB54_3
+; RV64ZVE32F-NEXT: beqz a3, .LBB54_4
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB54_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB54_4
-; RV64ZVE32F-NEXT: .LBB54_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB54_5
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB54_5
-; RV64ZVE32F-NEXT: .LBB54_3:
+; RV64ZVE32F-NEXT: j .LBB54_6
+; RV64ZVE32F-NEXT: .LBB54_4:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB54_2
-; RV64ZVE32F-NEXT: .LBB54_4: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB54_2
+; RV64ZVE32F-NEXT: .LBB54_5: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB54_5: # %else2
+; RV64ZVE32F-NEXT: .LBB54_6: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB54_10
-; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB54_14
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: .LBB54_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB54_11
-; RV64ZVE32F-NEXT: .LBB54_7:
+; RV64ZVE32F-NEXT: bnez a7, .LBB54_15
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: .LBB54_10:
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB54_12
-; RV64ZVE32F-NEXT: .LBB54_8:
+; RV64ZVE32F-NEXT: bnez t0, .LBB54_16
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: .LBB54_12:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB54_13
-; RV64ZVE32F-NEXT: .LBB54_9:
+; RV64ZVE32F-NEXT: bnez t1, .LBB54_17
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB54_14
-; RV64ZVE32F-NEXT: .LBB54_10:
+; RV64ZVE32F-NEXT: j .LBB54_18
+; RV64ZVE32F-NEXT: .LBB54_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: beqz a7, .LBB54_7
-; RV64ZVE32F-NEXT: .LBB54_11: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB54_8
+; RV64ZVE32F-NEXT: .LBB54_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: beqz t0, .LBB54_8
-; RV64ZVE32F-NEXT: .LBB54_12: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB54_10
+; RV64ZVE32F-NEXT: .LBB54_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: beqz t1, .LBB54_9
-; RV64ZVE32F-NEXT: .LBB54_13: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB54_12
+; RV64ZVE32F-NEXT: .LBB54_17: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB54_14: # %else14
+; RV64ZVE32F-NEXT: .LBB54_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB54_17
-; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB54_22
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: .LBB54_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB54_18
-; RV64ZVE32F-NEXT: .LBB54_16:
+; RV64ZVE32F-NEXT: bnez a5, .LBB54_23
+; RV64ZVE32F-NEXT: # %bb.21:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB54_19
-; RV64ZVE32F-NEXT: .LBB54_17:
+; RV64ZVE32F-NEXT: j .LBB54_24
+; RV64ZVE32F-NEXT: .LBB54_22:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: beqz a5, .LBB54_16
-; RV64ZVE32F-NEXT: .LBB54_18: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB54_20
+; RV64ZVE32F-NEXT: .LBB54_23: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB54_19: # %else20
+; RV64ZVE32F-NEXT: .LBB54_24: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -6077,81 +5996,81 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB55_7
+; RV32ZVE32F-NEXT: beqz a3, .LBB55_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB55_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB55_8
-; RV32ZVE32F-NEXT: .LBB55_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB55_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
+; RV32ZVE32F-NEXT: .LBB55_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB55_9
-; RV32ZVE32F-NEXT: .LBB55_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB55_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
+; RV32ZVE32F-NEXT: .LBB55_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB55_10
-; RV32ZVE32F-NEXT: .LBB55_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB55_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
+; RV32ZVE32F-NEXT: .LBB55_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB55_11
-; RV32ZVE32F-NEXT: .LBB55_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB55_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
+; RV32ZVE32F-NEXT: .LBB55_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB55_12
-; RV32ZVE32F-NEXT: .LBB55_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB55_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB55_13
-; RV32ZVE32F-NEXT: .LBB55_7:
+; RV32ZVE32F-NEXT: j .LBB55_18
+; RV32ZVE32F-NEXT: .LBB55_12:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB55_2
-; RV32ZVE32F-NEXT: .LBB55_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB55_2
+; RV32ZVE32F-NEXT: .LBB55_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB55_3
-; RV32ZVE32F-NEXT: .LBB55_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB55_4
+; RV32ZVE32F-NEXT: .LBB55_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB55_4
-; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB55_6
+; RV32ZVE32F-NEXT: .LBB55_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB55_5
-; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB55_8
+; RV32ZVE32F-NEXT: .LBB55_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB55_6
-; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB55_10
+; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB55_13: # %else14
+; RV32ZVE32F-NEXT: .LBB55_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -6159,31 +6078,31 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB55_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB55_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB55_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB55_17
-; RV32ZVE32F-NEXT: .LBB55_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB55_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB55_18
-; RV32ZVE32F-NEXT: .LBB55_16:
+; RV32ZVE32F-NEXT: j .LBB55_24
+; RV32ZVE32F-NEXT: .LBB55_22:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB55_15
-; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB55_20
+; RV32ZVE32F-NEXT: .LBB55_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB55_18: # %else20
+; RV32ZVE32F-NEXT: .LBB55_24: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -6213,104 +6132,104 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB55_3
+; RV64ZVE32F-NEXT: beqz a3, .LBB55_4
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB55_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB55_4
-; RV64ZVE32F-NEXT: .LBB55_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB55_5
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB55_5
-; RV64ZVE32F-NEXT: .LBB55_3:
+; RV64ZVE32F-NEXT: j .LBB55_6
+; RV64ZVE32F-NEXT: .LBB55_4:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB55_2
-; RV64ZVE32F-NEXT: .LBB55_4: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB55_2
+; RV64ZVE32F-NEXT: .LBB55_5: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB55_5: # %else2
+; RV64ZVE32F-NEXT: .LBB55_6: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB55_10
-; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB55_14
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: .LBB55_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB55_11
-; RV64ZVE32F-NEXT: .LBB55_7:
+; RV64ZVE32F-NEXT: bnez a7, .LBB55_15
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: .LBB55_10:
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB55_12
-; RV64ZVE32F-NEXT: .LBB55_8:
+; RV64ZVE32F-NEXT: bnez t0, .LBB55_16
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: .LBB55_12:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB55_13
-; RV64ZVE32F-NEXT: .LBB55_9:
+; RV64ZVE32F-NEXT: bnez t1, .LBB55_17
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB55_14
-; RV64ZVE32F-NEXT: .LBB55_10:
+; RV64ZVE32F-NEXT: j .LBB55_18
+; RV64ZVE32F-NEXT: .LBB55_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: beqz a7, .LBB55_7
-; RV64ZVE32F-NEXT: .LBB55_11: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB55_8
+; RV64ZVE32F-NEXT: .LBB55_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: beqz t0, .LBB55_8
-; RV64ZVE32F-NEXT: .LBB55_12: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB55_10
+; RV64ZVE32F-NEXT: .LBB55_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: beqz t1, .LBB55_9
-; RV64ZVE32F-NEXT: .LBB55_13: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB55_12
+; RV64ZVE32F-NEXT: .LBB55_17: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB55_14: # %else14
+; RV64ZVE32F-NEXT: .LBB55_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB55_17
-; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB55_22
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: .LBB55_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB55_18
-; RV64ZVE32F-NEXT: .LBB55_16:
+; RV64ZVE32F-NEXT: bnez a5, .LBB55_23
+; RV64ZVE32F-NEXT: # %bb.21:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB55_19
-; RV64ZVE32F-NEXT: .LBB55_17:
+; RV64ZVE32F-NEXT: j .LBB55_24
+; RV64ZVE32F-NEXT: .LBB55_22:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: beqz a5, .LBB55_16
-; RV64ZVE32F-NEXT: .LBB55_18: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB55_20
+; RV64ZVE32F-NEXT: .LBB55_23: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB55_19: # %else20
+; RV64ZVE32F-NEXT: .LBB55_24: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -6355,81 +6274,81 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB56_7
+; RV32ZVE32F-NEXT: beqz a3, .LBB56_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
+; RV32ZVE32F-NEXT: .LBB56_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB56_8
-; RV32ZVE32F-NEXT: .LBB56_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB56_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
+; RV32ZVE32F-NEXT: .LBB56_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB56_9
-; RV32ZVE32F-NEXT: .LBB56_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB56_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
+; RV32ZVE32F-NEXT: .LBB56_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB56_10
-; RV32ZVE32F-NEXT: .LBB56_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB56_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
+; RV32ZVE32F-NEXT: .LBB56_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB56_11
-; RV32ZVE32F-NEXT: .LBB56_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB56_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
+; RV32ZVE32F-NEXT: .LBB56_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB56_12
-; RV32ZVE32F-NEXT: .LBB56_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB56_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB56_13
-; RV32ZVE32F-NEXT: .LBB56_7:
+; RV32ZVE32F-NEXT: j .LBB56_18
+; RV32ZVE32F-NEXT: .LBB56_12:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB56_2
-; RV32ZVE32F-NEXT: .LBB56_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB56_2
+; RV32ZVE32F-NEXT: .LBB56_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB56_3
-; RV32ZVE32F-NEXT: .LBB56_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB56_4
+; RV32ZVE32F-NEXT: .LBB56_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB56_4
-; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB56_6
+; RV32ZVE32F-NEXT: .LBB56_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB56_5
-; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB56_8
+; RV32ZVE32F-NEXT: .LBB56_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB56_6
-; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB56_10
+; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB56_13: # %else14
+; RV32ZVE32F-NEXT: .LBB56_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -6437,31 +6356,31 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB56_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB56_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB56_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB56_17
-; RV32ZVE32F-NEXT: .LBB56_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB56_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB56_18
-; RV32ZVE32F-NEXT: .LBB56_16:
+; RV32ZVE32F-NEXT: j .LBB56_24
+; RV32ZVE32F-NEXT: .LBB56_22:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB56_15
-; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB56_20
+; RV32ZVE32F-NEXT: .LBB56_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB56_18: # %else20
+; RV32ZVE32F-NEXT: .LBB56_24: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -6491,7 +6410,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB56_3
+; RV64ZVE32F-NEXT: beqz a3, .LBB56_4
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -6499,16 +6418,16 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
+; RV64ZVE32F-NEXT: .LBB56_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB56_4
-; RV64ZVE32F-NEXT: .LBB56_2:
+; RV64ZVE32F-NEXT: bnez a4, .LBB56_5
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB56_5
-; RV64ZVE32F-NEXT: .LBB56_3:
+; RV64ZVE32F-NEXT: j .LBB56_6
+; RV64ZVE32F-NEXT: .LBB56_4:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB56_2
-; RV64ZVE32F-NEXT: .LBB56_4: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB56_2
+; RV64ZVE32F-NEXT: .LBB56_5: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
@@ -6516,87 +6435,87 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: srli a4, a4, 29
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB56_5: # %else2
+; RV64ZVE32F-NEXT: .LBB56_6: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB56_10
-; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB56_14
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 32
; RV64ZVE32F-NEXT: srli a6, a6, 29
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: .LBB56_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB56_11
-; RV64ZVE32F-NEXT: .LBB56_7:
+; RV64ZVE32F-NEXT: bnez a7, .LBB56_15
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: .LBB56_10:
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB56_12
-; RV64ZVE32F-NEXT: .LBB56_8:
+; RV64ZVE32F-NEXT: bnez t0, .LBB56_16
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: .LBB56_12:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB56_13
-; RV64ZVE32F-NEXT: .LBB56_9:
+; RV64ZVE32F-NEXT: bnez t1, .LBB56_17
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB56_14
-; RV64ZVE32F-NEXT: .LBB56_10:
+; RV64ZVE32F-NEXT: j .LBB56_18
+; RV64ZVE32F-NEXT: .LBB56_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: beqz a7, .LBB56_7
-; RV64ZVE32F-NEXT: .LBB56_11: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB56_8
+; RV64ZVE32F-NEXT: .LBB56_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 32
; RV64ZVE32F-NEXT: srli a7, a7, 29
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: beqz t0, .LBB56_8
-; RV64ZVE32F-NEXT: .LBB56_12: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB56_10
+; RV64ZVE32F-NEXT: .LBB56_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 32
; RV64ZVE32F-NEXT: srli t0, t0, 29
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: beqz t1, .LBB56_9
-; RV64ZVE32F-NEXT: .LBB56_13: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB56_12
+; RV64ZVE32F-NEXT: .LBB56_17: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 32
; RV64ZVE32F-NEXT: srli t1, t1, 29
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB56_14: # %else14
+; RV64ZVE32F-NEXT: .LBB56_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB56_17
-; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB56_22
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 32
; RV64ZVE32F-NEXT: srli t2, t2, 29
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: .LBB56_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB56_18
-; RV64ZVE32F-NEXT: .LBB56_16:
+; RV64ZVE32F-NEXT: bnez a5, .LBB56_23
+; RV64ZVE32F-NEXT: # %bb.21:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB56_19
-; RV64ZVE32F-NEXT: .LBB56_17:
+; RV64ZVE32F-NEXT: j .LBB56_24
+; RV64ZVE32F-NEXT: .LBB56_22:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: beqz a5, .LBB56_16
-; RV64ZVE32F-NEXT: .LBB56_18: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB56_20
+; RV64ZVE32F-NEXT: .LBB56_23: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB56_19: # %else20
+; RV64ZVE32F-NEXT: .LBB56_24: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -6656,81 +6575,81 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: andi a2, t0, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a2, .LBB57_7
+; RV32ZVE32F-NEXT: beqz a2, .LBB57_12
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
+; RV32ZVE32F-NEXT: .LBB57_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB57_8
-; RV32ZVE32F-NEXT: .LBB57_2:
+; RV32ZVE32F-NEXT: bnez a4, .LBB57_13
+; RV32ZVE32F-NEXT: # %bb.3:
; RV32ZVE32F-NEXT: lw a4, 8(a3)
; RV32ZVE32F-NEXT: lw a5, 12(a3)
+; RV32ZVE32F-NEXT: .LBB57_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB57_9
-; RV32ZVE32F-NEXT: .LBB57_3:
+; RV32ZVE32F-NEXT: bnez a6, .LBB57_14
+; RV32ZVE32F-NEXT: # %bb.5:
; RV32ZVE32F-NEXT: lw a6, 16(a3)
; RV32ZVE32F-NEXT: lw a7, 20(a3)
+; RV32ZVE32F-NEXT: .LBB57_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB57_10
-; RV32ZVE32F-NEXT: .LBB57_4:
+; RV32ZVE32F-NEXT: bnez t1, .LBB57_15
+; RV32ZVE32F-NEXT: # %bb.7:
; RV32ZVE32F-NEXT: lw t1, 24(a3)
; RV32ZVE32F-NEXT: lw t2, 28(a3)
+; RV32ZVE32F-NEXT: .LBB57_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB57_11
-; RV32ZVE32F-NEXT: .LBB57_5:
+; RV32ZVE32F-NEXT: bnez t3, .LBB57_16
+; RV32ZVE32F-NEXT: # %bb.9:
; RV32ZVE32F-NEXT: lw t3, 32(a3)
; RV32ZVE32F-NEXT: lw t4, 36(a3)
+; RV32ZVE32F-NEXT: .LBB57_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB57_12
-; RV32ZVE32F-NEXT: .LBB57_6:
+; RV32ZVE32F-NEXT: bnez t5, .LBB57_17
+; RV32ZVE32F-NEXT: # %bb.11:
; RV32ZVE32F-NEXT: lw t5, 40(a3)
; RV32ZVE32F-NEXT: lw t6, 44(a3)
-; RV32ZVE32F-NEXT: j .LBB57_13
-; RV32ZVE32F-NEXT: .LBB57_7:
+; RV32ZVE32F-NEXT: j .LBB57_18
+; RV32ZVE32F-NEXT: .LBB57_12:
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a2, 4(a3)
-; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: beqz a4, .LBB57_2
-; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load1
+; RV32ZVE32F-NEXT: j .LBB57_2
+; RV32ZVE32F-NEXT: .LBB57_13: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: beqz a6, .LBB57_3
-; RV32ZVE32F-NEXT: .LBB57_9: # %cond.load4
+; RV32ZVE32F-NEXT: j .LBB57_4
+; RV32ZVE32F-NEXT: .LBB57_14: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: beqz t1, .LBB57_4
-; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load7
+; RV32ZVE32F-NEXT: j .LBB57_6
+; RV32ZVE32F-NEXT: .LBB57_15: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: beqz t3, .LBB57_5
-; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load10
+; RV32ZVE32F-NEXT: j .LBB57_8
+; RV32ZVE32F-NEXT: .LBB57_16: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: beqz t5, .LBB57_6
-; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load13
+; RV32ZVE32F-NEXT: j .LBB57_10
+; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB57_13: # %else14
+; RV32ZVE32F-NEXT: .LBB57_18: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -6738,31 +6657,31 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB57_16
-; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB57_22
+; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
+; RV32ZVE32F-NEXT: .LBB57_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB57_17
-; RV32ZVE32F-NEXT: .LBB57_15:
+; RV32ZVE32F-NEXT: bnez t0, .LBB57_23
+; RV32ZVE32F-NEXT: # %bb.21:
; RV32ZVE32F-NEXT: lw t0, 56(a3)
; RV32ZVE32F-NEXT: lw a3, 60(a3)
-; RV32ZVE32F-NEXT: j .LBB57_18
-; RV32ZVE32F-NEXT: .LBB57_16:
+; RV32ZVE32F-NEXT: j .LBB57_24
+; RV32ZVE32F-NEXT: .LBB57_22:
; RV32ZVE32F-NEXT: lw s0, 48(a3)
; RV32ZVE32F-NEXT: lw s1, 52(a3)
-; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: beqz t0, .LBB57_15
-; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load19
+; RV32ZVE32F-NEXT: j .LBB57_20
+; RV32ZVE32F-NEXT: .LBB57_23: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw t0, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB57_18: # %else20
+; RV32ZVE32F-NEXT: .LBB57_24: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -6792,93 +6711,93 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a7, v0
; RV64ZVE32F-NEXT: andi a4, a7, 1
-; RV64ZVE32F-NEXT: beqz a4, .LBB57_9
+; RV64ZVE32F-NEXT: beqz a4, .LBB57_16
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a4, 0(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
+; RV64ZVE32F-NEXT: .LBB57_2: # %cond.load
; RV64ZVE32F-NEXT: andi a5, a7, 2
-; RV64ZVE32F-NEXT: bnez a5, .LBB57_10
-; RV64ZVE32F-NEXT: .LBB57_2:
+; RV64ZVE32F-NEXT: bnez a5, .LBB57_17
+; RV64ZVE32F-NEXT: # %bb.3:
; RV64ZVE32F-NEXT: ld a5, 8(a3)
+; RV64ZVE32F-NEXT: .LBB57_4:
; RV64ZVE32F-NEXT: andi a6, a7, 4
-; RV64ZVE32F-NEXT: bnez a6, .LBB57_11
-; RV64ZVE32F-NEXT: .LBB57_3:
+; RV64ZVE32F-NEXT: bnez a6, .LBB57_18
+; RV64ZVE32F-NEXT: # %bb.5:
; RV64ZVE32F-NEXT: ld a6, 16(a3)
+; RV64ZVE32F-NEXT: .LBB57_6:
; RV64ZVE32F-NEXT: andi t0, a7, 8
-; RV64ZVE32F-NEXT: bnez t0, .LBB57_12
-; RV64ZVE32F-NEXT: .LBB57_4:
+; RV64ZVE32F-NEXT: bnez t0, .LBB57_19
+; RV64ZVE32F-NEXT: # %bb.7:
; RV64ZVE32F-NEXT: ld t0, 24(a3)
+; RV64ZVE32F-NEXT: .LBB57_8:
; RV64ZVE32F-NEXT: andi t1, a7, 16
-; RV64ZVE32F-NEXT: bnez t1, .LBB57_13
-; RV64ZVE32F-NEXT: .LBB57_5:
+; RV64ZVE32F-NEXT: bnez t1, .LBB57_20
+; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld t1, 32(a3)
+; RV64ZVE32F-NEXT: .LBB57_10:
; RV64ZVE32F-NEXT: andi t2, a7, 32
-; RV64ZVE32F-NEXT: bnez t2, .LBB57_14
-; RV64ZVE32F-NEXT: .LBB57_6:
+; RV64ZVE32F-NEXT: bnez t2, .LBB57_21
+; RV64ZVE32F-NEXT: # %bb.11:
; RV64ZVE32F-NEXT: ld t2, 40(a3)
+; RV64ZVE32F-NEXT: .LBB57_12:
; RV64ZVE32F-NEXT: andi t3, a7, 64
-; RV64ZVE32F-NEXT: bnez t3, .LBB57_15
-; RV64ZVE32F-NEXT: .LBB57_7:
+; RV64ZVE32F-NEXT: bnez t3, .LBB57_22
+; RV64ZVE32F-NEXT: # %bb.13:
; RV64ZVE32F-NEXT: ld t3, 48(a3)
+; RV64ZVE32F-NEXT: .LBB57_14:
; RV64ZVE32F-NEXT: andi a7, a7, -128
-; RV64ZVE32F-NEXT: bnez a7, .LBB57_16
-; RV64ZVE32F-NEXT: .LBB57_8:
+; RV64ZVE32F-NEXT: bnez a7, .LBB57_23
+; RV64ZVE32F-NEXT: # %bb.15:
; RV64ZVE32F-NEXT: ld a1, 56(a3)
-; RV64ZVE32F-NEXT: j .LBB57_17
-; RV64ZVE32F-NEXT: .LBB57_9:
+; RV64ZVE32F-NEXT: j .LBB57_24
+; RV64ZVE32F-NEXT: .LBB57_16:
; RV64ZVE32F-NEXT: ld a4, 0(a3)
-; RV64ZVE32F-NEXT: andi a5, a7, 2
-; RV64ZVE32F-NEXT: beqz a5, .LBB57_2
-; RV64ZVE32F-NEXT: .LBB57_10: # %cond.load1
+; RV64ZVE32F-NEXT: j .LBB57_2
+; RV64ZVE32F-NEXT: .LBB57_17: # %cond.load1
; RV64ZVE32F-NEXT: ld a5, 8(a2)
; RV64ZVE32F-NEXT: slli a5, a5, 3
; RV64ZVE32F-NEXT: add a5, a1, a5
; RV64ZVE32F-NEXT: ld a5, 0(a5)
-; RV64ZVE32F-NEXT: andi a6, a7, 4
-; RV64ZVE32F-NEXT: beqz a6, .LBB57_3
-; RV64ZVE32F-NEXT: .LBB57_11: # %cond.load4
+; RV64ZVE32F-NEXT: j .LBB57_4
+; RV64ZVE32F-NEXT: .LBB57_18: # %cond.load4
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: andi t0, a7, 8
-; RV64ZVE32F-NEXT: beqz t0, .LBB57_4
-; RV64ZVE32F-NEXT: .LBB57_12: # %cond.load7
+; RV64ZVE32F-NEXT: j .LBB57_6
+; RV64ZVE32F-NEXT: .LBB57_19: # %cond.load7
; RV64ZVE32F-NEXT: ld t0, 24(a2)
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a7, 16
-; RV64ZVE32F-NEXT: beqz t1, .LBB57_5
-; RV64ZVE32F-NEXT: .LBB57_13: # %cond.load10
+; RV64ZVE32F-NEXT: j .LBB57_8
+; RV64ZVE32F-NEXT: .LBB57_20: # %cond.load10
; RV64ZVE32F-NEXT: ld t1, 32(a2)
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: andi t2, a7, 32
-; RV64ZVE32F-NEXT: beqz t2, .LBB57_6
-; RV64ZVE32F-NEXT: .LBB57_14: # %cond.load13
+; RV64ZVE32F-NEXT: j .LBB57_10
+; RV64ZVE32F-NEXT: .LBB57_21: # %cond.load13
; RV64ZVE32F-NEXT: ld t2, 40(a2)
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: andi t3, a7, 64
-; RV64ZVE32F-NEXT: beqz t3, .LBB57_7
-; RV64ZVE32F-NEXT: .LBB57_15: # %cond.load16
+; RV64ZVE32F-NEXT: j .LBB57_12
+; RV64ZVE32F-NEXT: .LBB57_22: # %cond.load16
; RV64ZVE32F-NEXT: ld t3, 48(a2)
; RV64ZVE32F-NEXT: slli t3, t3, 3
; RV64ZVE32F-NEXT: add t3, a1, t3
; RV64ZVE32F-NEXT: ld t3, 0(t3)
-; RV64ZVE32F-NEXT: andi a7, a7, -128
-; RV64ZVE32F-NEXT: beqz a7, .LBB57_8
-; RV64ZVE32F-NEXT: .LBB57_16: # %cond.load19
+; RV64ZVE32F-NEXT: j .LBB57_14
+; RV64ZVE32F-NEXT: .LBB57_23: # %cond.load19
; RV64ZVE32F-NEXT: ld a2, 56(a2)
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB57_17: # %else20
+; RV64ZVE32F-NEXT: .LBB57_24: # %else20
; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: sd a5, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -6961,17 +6880,16 @@ define <2 x bfloat> @mgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x bfloat> %
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB59_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB59_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB59_4
-; RV64ZVE32F-NEXT: .LBB59_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
+; RV64ZVE32F-NEXT: j .LBB59_1
; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -7006,7 +6924,7 @@ define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB60_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB60_1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB60_6
; RV64ZVE32F-NEXT: .LBB60_2: # %else2
@@ -7015,15 +6933,14 @@ define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %
; RV64ZVE32F-NEXT: .LBB60_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB60_8
-; RV64ZVE32F-NEXT: .LBB60_4: # %else8
+; RV64ZVE32F-NEXT: # %bb.4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
+; RV64ZVE32F-NEXT: j .LBB60_1
; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -7031,16 +6948,14 @@ define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB60_3
+; RV64ZVE32F-NEXT: j .LBB60_2
; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: andi a1, a1, 8
-; RV64ZVE32F-NEXT: beqz a1, .LBB60_4
+; RV64ZVE32F-NEXT: j .LBB60_3
; RV64ZVE32F-NEXT: .LBB60_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
@@ -7130,7 +7045,7 @@ define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB63_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB63_1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB63_10
; RV64ZVE32F-NEXT: .LBB63_2: # %else2
@@ -7151,15 +7066,14 @@ define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %
; RV64ZVE32F-NEXT: .LBB63_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB63_16
-; RV64ZVE32F-NEXT: .LBB63_8: # %else20
+; RV64ZVE32F-NEXT: # %bb.8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
+; RV64ZVE32F-NEXT: j .LBB63_1
; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -7167,48 +7081,42 @@ define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB63_3
+; RV64ZVE32F-NEXT: j .LBB63_2
; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB63_4
+; RV64ZVE32F-NEXT: j .LBB63_3
; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB63_5
+; RV64ZVE32F-NEXT: j .LBB63_4
; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB63_6
+; RV64ZVE32F-NEXT: j .LBB63_5
; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: beqz a2, .LBB63_7
+; RV64ZVE32F-NEXT: j .LBB63_6
; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB63_8
+; RV64ZVE32F-NEXT: j .LBB63_7
; RV64ZVE32F-NEXT: .LBB63_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
@@ -7275,7 +7183,7 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB64_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB64_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB64_15
; RV64ZVE32F-NEXT: .LBB64_6: # %else8
@@ -7284,7 +7192,7 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: .LBB64_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB64_9
-; RV64ZVE32F-NEXT: .LBB64_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -7336,8 +7244,7 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB64_6
+; RV64ZVE32F-NEXT: j .LBB64_5
; RV64ZVE32F-NEXT: .LBB64_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -7349,8 +7256,7 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB64_7
+; RV64ZVE32F-NEXT: j .LBB64_6
; RV64ZVE32F-NEXT: .LBB64_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7361,9 +7267,7 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB64_8
-; RV64ZVE32F-NEXT: j .LBB64_9
+; RV64ZVE32F-NEXT: j .LBB64_7
%ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
%v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
ret <8 x bfloat> %v
@@ -7424,7 +7328,7 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB65_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB65_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB65_15
; RV64ZVE32F-NEXT: .LBB65_6: # %else8
@@ -7433,7 +7337,7 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB65_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB65_9
-; RV64ZVE32F-NEXT: .LBB65_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -7485,8 +7389,7 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB65_6
+; RV64ZVE32F-NEXT: j .LBB65_5
; RV64ZVE32F-NEXT: .LBB65_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -7498,8 +7401,7 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB65_7
+; RV64ZVE32F-NEXT: j .LBB65_6
; RV64ZVE32F-NEXT: .LBB65_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7510,9 +7412,7 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB65_8
-; RV64ZVE32F-NEXT: j .LBB65_9
+; RV64ZVE32F-NEXT: j .LBB65_7
%eidxs = sext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
%v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
@@ -7574,7 +7474,7 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB66_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB66_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB66_15
; RV64ZVE32F-NEXT: .LBB66_6: # %else8
@@ -7583,7 +7483,7 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB66_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB66_9
-; RV64ZVE32F-NEXT: .LBB66_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -7639,8 +7539,7 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB66_6
+; RV64ZVE32F-NEXT: j .LBB66_5
; RV64ZVE32F-NEXT: .LBB66_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -7653,8 +7552,7 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB66_7
+; RV64ZVE32F-NEXT: j .LBB66_6
; RV64ZVE32F-NEXT: .LBB66_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7666,9 +7564,7 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB66_8
-; RV64ZVE32F-NEXT: j .LBB66_9
+; RV64ZVE32F-NEXT: j .LBB66_7
%eidxs = zext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
%v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
@@ -7727,7 +7623,7 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB67_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB67_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB67_15
; RV64ZVE32F-NEXT: .LBB67_6: # %else8
@@ -7736,7 +7632,7 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: .LBB67_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB67_9
-; RV64ZVE32F-NEXT: .LBB67_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -7784,8 +7680,7 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB67_6
+; RV64ZVE32F-NEXT: j .LBB67_5
; RV64ZVE32F-NEXT: .LBB67_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -7796,8 +7691,7 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB67_7
+; RV64ZVE32F-NEXT: j .LBB67_6
; RV64ZVE32F-NEXT: .LBB67_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7806,9 +7700,7 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB67_8
-; RV64ZVE32F-NEXT: j .LBB67_9
+; RV64ZVE32F-NEXT: j .LBB67_7
%ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
%v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
ret <8 x bfloat> %v
@@ -7882,17 +7774,16 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1
; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB69_3
-; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_4
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_2: # %else2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.2: # %else2
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB69_3: # %cond.load
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_2
+; RV64ZVE32F-ZVFH-NEXT: j .LBB69_1
; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %cond.load1
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a1)
; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -7907,17 +7798,16 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB69_3
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_4
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.2: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_3: # %cond.load
; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_2
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_1
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1
; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a1)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -7952,7 +7842,7 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_5
-; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_6
; RV64ZVE32F-ZVFH-NEXT: .LBB70_2: # %else2
@@ -7961,15 +7851,14 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: .LBB70_3: # %else5
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_8
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else8
+; RV64ZVE32F-ZVFH-NEXT: # %bb.4: # %else8
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB70_5: # %cond.load
; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_2
+; RV64ZVE32F-ZVFH-NEXT: j .LBB70_1
; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %cond.load1
; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
@@ -7977,16 +7866,14 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_3
+; RV64ZVE32F-ZVFH-NEXT: j .LBB70_2
; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %cond.load4
; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_4
+; RV64ZVE32F-ZVFH-NEXT: j .LBB70_3
; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.load7
; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
@@ -8001,7 +7888,7 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_5
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else2
@@ -8010,15 +7897,14 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_3: # %else5
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_8
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.4: # %else8
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %cond.load
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_2
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_1
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
@@ -8026,16 +7912,14 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_3
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_2
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_4
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_3
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7
; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
@@ -8142,7 +8026,7 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_9
-; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_10
; RV64ZVE32F-ZVFH-NEXT: .LBB73_2: # %else2
@@ -8163,15 +8047,14 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: .LBB73_7: # %else17
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB73_16
-; RV64ZVE32F-ZVFH-NEXT: .LBB73_8: # %else20
+; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %else20
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB73_9: # %cond.load
; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_2
+; RV64ZVE32F-ZVFH-NEXT: j .LBB73_1
; RV64ZVE32F-ZVFH-NEXT: .LBB73_10: # %cond.load1
; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
@@ -8179,48 +8062,42 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_3
+; RV64ZVE32F-ZVFH-NEXT: j .LBB73_2
; RV64ZVE32F-ZVFH-NEXT: .LBB73_11: # %cond.load4
; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_4
+; RV64ZVE32F-ZVFH-NEXT: j .LBB73_3
; RV64ZVE32F-ZVFH-NEXT: .LBB73_12: # %cond.load7
; RV64ZVE32F-ZVFH-NEXT: ld a2, 24(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_5
+; RV64ZVE32F-ZVFH-NEXT: j .LBB73_4
; RV64ZVE32F-ZVFH-NEXT: .LBB73_13: # %cond.load10
; RV64ZVE32F-ZVFH-NEXT: ld a2, 32(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 4
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_6
+; RV64ZVE32F-ZVFH-NEXT: j .LBB73_5
; RV64ZVE32F-ZVFH-NEXT: .LBB73_14: # %cond.load13
; RV64ZVE32F-ZVFH-NEXT: ld a2, 40(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 5
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_7
+; RV64ZVE32F-ZVFH-NEXT: j .LBB73_6
; RV64ZVE32F-ZVFH-NEXT: .LBB73_15: # %cond.load16
; RV64ZVE32F-ZVFH-NEXT: ld a2, 48(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 6
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB73_8
+; RV64ZVE32F-ZVFH-NEXT: j .LBB73_7
; RV64ZVE32F-ZVFH-NEXT: .LBB73_16: # %cond.load19
; RV64ZVE32F-ZVFH-NEXT: ld a0, 56(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
@@ -8235,7 +8112,7 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_9
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_10
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_2: # %else2
@@ -8256,15 +8133,14 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_7: # %else17
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_16
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_8: # %else20
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %else20
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_9: # %cond.load
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_2
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_1
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_10: # %cond.load1
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
@@ -8272,48 +8148,42 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_3
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_2
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_11: # %cond.load4
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_4
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_3
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_12: # %cond.load7
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 24(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_5
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_4
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_13: # %cond.load10
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 32(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_6
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_14: # %cond.load13
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 40(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 5
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_7
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_15: # %cond.load16
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 48(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 6
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_8
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_7
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_16: # %cond.load19
; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 56(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
@@ -8380,7 +8250,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_14
-; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_5: # %else5
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_15
; RV64ZVE32F-ZVFH-NEXT: .LBB74_6: # %else8
@@ -8389,7 +8259,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFH-NEXT: .LBB74_7: # %else11
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_9
-; RV64ZVE32F-ZVFH-NEXT: .LBB74_8: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
@@ -8441,8 +8311,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_6
+; RV64ZVE32F-ZVFH-NEXT: j .LBB74_5
; RV64ZVE32F-ZVFH-NEXT: .LBB74_15: # %cond.load7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
@@ -8454,8 +8323,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_7
+; RV64ZVE32F-ZVFH-NEXT: j .LBB74_6
; RV64ZVE32F-ZVFH-NEXT: .LBB74_16: # %cond.load10
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
@@ -8466,9 +8334,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_8
-; RV64ZVE32F-ZVFH-NEXT: j .LBB74_9
+; RV64ZVE32F-ZVFH-NEXT: j .LBB74_7
;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
@@ -8504,7 +8370,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_14
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_5: # %else5
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_15
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_6: # %else8
@@ -8513,7 +8379,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_7: # %else11
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_9
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_8: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
@@ -8565,8 +8431,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_6
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_15: # %cond.load7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
@@ -8578,8 +8443,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_7
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_16: # %cond.load10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
@@ -8590,9 +8454,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_8
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_9
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_7
%ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
@@ -8653,7 +8515,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_14
-; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_5: # %else5
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_15
; RV64ZVE32F-ZVFH-NEXT: .LBB75_6: # %else8
@@ -8662,7 +8524,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: .LBB75_7: # %else11
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_9
-; RV64ZVE32F-ZVFH-NEXT: .LBB75_8: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
@@ -8714,8 +8576,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_6
+; RV64ZVE32F-ZVFH-NEXT: j .LBB75_5
; RV64ZVE32F-ZVFH-NEXT: .LBB75_15: # %cond.load7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
@@ -8727,8 +8588,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_7
+; RV64ZVE32F-ZVFH-NEXT: j .LBB75_6
; RV64ZVE32F-ZVFH-NEXT: .LBB75_16: # %cond.load10
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
@@ -8739,9 +8599,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_8
-; RV64ZVE32F-ZVFH-NEXT: j .LBB75_9
+; RV64ZVE32F-ZVFH-NEXT: j .LBB75_7
;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
@@ -8777,7 +8635,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_14
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_5: # %else5
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_15
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_6: # %else8
@@ -8786,7 +8644,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_7: # %else11
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_9
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_8: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
@@ -8838,8 +8696,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_6
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_15: # %cond.load7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
@@ -8851,8 +8708,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_7
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_16: # %cond.load10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
@@ -8863,9 +8719,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_8
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_9
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_7
%eidxs = sext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
@@ -8927,7 +8781,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_14
-; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_5: # %else5
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_15
; RV64ZVE32F-ZVFH-NEXT: .LBB76_6: # %else8
@@ -8936,7 +8790,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: .LBB76_7: # %else11
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_9
-; RV64ZVE32F-ZVFH-NEXT: .LBB76_8: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
@@ -8992,8 +8846,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_6
+; RV64ZVE32F-ZVFH-NEXT: j .LBB76_5
; RV64ZVE32F-ZVFH-NEXT: .LBB76_15: # %cond.load7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
@@ -9006,8 +8859,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_7
+; RV64ZVE32F-ZVFH-NEXT: j .LBB76_6
; RV64ZVE32F-ZVFH-NEXT: .LBB76_16: # %cond.load10
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
@@ -9019,9 +8871,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_8
-; RV64ZVE32F-ZVFH-NEXT: j .LBB76_9
+; RV64ZVE32F-ZVFH-NEXT: j .LBB76_7
;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
@@ -9059,7 +8909,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_14
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_5: # %else5
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_15
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_6: # %else8
@@ -9068,7 +8918,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_7: # %else11
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_9
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_8: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
@@ -9124,8 +8974,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_6
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_15: # %cond.load7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
@@ -9138,8 +8987,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_7
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %cond.load10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
@@ -9151,9 +8999,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_8
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_9
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_7
%eidxs = zext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
@@ -9212,7 +9058,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_14
-; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_5: # %else5
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_15
; RV64ZVE32F-ZVFH-NEXT: .LBB77_6: # %else8
@@ -9221,7 +9067,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFH-NEXT: .LBB77_7: # %else11
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_9
-; RV64ZVE32F-ZVFH-NEXT: .LBB77_8: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
@@ -9269,8 +9115,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_6
+; RV64ZVE32F-ZVFH-NEXT: j .LBB77_5
; RV64ZVE32F-ZVFH-NEXT: .LBB77_15: # %cond.load7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
@@ -9281,8 +9126,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_7
+; RV64ZVE32F-ZVFH-NEXT: j .LBB77_6
; RV64ZVE32F-ZVFH-NEXT: .LBB77_16: # %cond.load10
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
@@ -9291,9 +9135,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_8
-; RV64ZVE32F-ZVFH-NEXT: j .LBB77_9
+; RV64ZVE32F-ZVFH-NEXT: j .LBB77_7
;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
@@ -9328,7 +9170,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_14
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_5: # %else5
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_15
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_6: # %else8
@@ -9337,7 +9179,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_7: # %else11
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_9
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_8: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
@@ -9385,8 +9227,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_6
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_15: # %cond.load7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
@@ -9397,8 +9238,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_7
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_16: # %cond.load10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
@@ -9407,9 +9247,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_8
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_9
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_7
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
@@ -9483,17 +9321,16 @@ define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %pas
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB79_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB79_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB79_4
-; RV64ZVE32F-NEXT: .LBB79_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB79_3: # %cond.load
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB79_2
+; RV64ZVE32F-NEXT: j .LBB79_1
; RV64ZVE32F-NEXT: .LBB79_4: # %cond.load1
; RV64ZVE32F-NEXT: flw fa5, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -9528,7 +9365,7 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB80_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB80_1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB80_6
; RV64ZVE32F-NEXT: .LBB80_2: # %else2
@@ -9537,15 +9374,14 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas
; RV64ZVE32F-NEXT: .LBB80_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB80_8
-; RV64ZVE32F-NEXT: .LBB80_4: # %else8
+; RV64ZVE32F-NEXT: # %bb.4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB80_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
+; RV64ZVE32F-NEXT: j .LBB80_1
; RV64ZVE32F-NEXT: .LBB80_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
@@ -9553,16 +9389,14 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB80_3
+; RV64ZVE32F-NEXT: j .LBB80_2
; RV64ZVE32F-NEXT: .LBB80_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: andi a1, a1, 8
-; RV64ZVE32F-NEXT: beqz a1, .LBB80_4
+; RV64ZVE32F-NEXT: j .LBB80_3
; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
@@ -9651,7 +9485,7 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB83_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB83_1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB83_10
; RV64ZVE32F-NEXT: .LBB83_2: # %else2
@@ -9672,15 +9506,14 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas
; RV64ZVE32F-NEXT: .LBB83_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
-; RV64ZVE32F-NEXT: .LBB83_8: # %else20
+; RV64ZVE32F-NEXT: # %bb.8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB83_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_2
+; RV64ZVE32F-NEXT: j .LBB83_1
; RV64ZVE32F-NEXT: .LBB83_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
@@ -9688,48 +9521,42 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
-; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_3
+; RV64ZVE32F-NEXT: j .LBB83_2
; RV64ZVE32F-NEXT: .LBB83_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_4
+; RV64ZVE32F-NEXT: j .LBB83_3
; RV64ZVE32F-NEXT: .LBB83_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_5
+; RV64ZVE32F-NEXT: j .LBB83_4
; RV64ZVE32F-NEXT: .LBB83_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
+; RV64ZVE32F-NEXT: j .LBB83_5
; RV64ZVE32F-NEXT: .LBB83_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
+; RV64ZVE32F-NEXT: j .LBB83_6
; RV64ZVE32F-NEXT: .LBB83_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB83_8
+; RV64ZVE32F-NEXT: j .LBB83_7
; RV64ZVE32F-NEXT: .LBB83_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
@@ -9795,7 +9622,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB84_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
; RV64ZVE32F-NEXT: .LBB84_6: # %else8
@@ -9804,7 +9631,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: .LBB84_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
-; RV64ZVE32F-NEXT: .LBB84_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -9856,8 +9683,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
+; RV64ZVE32F-NEXT: j .LBB84_5
; RV64ZVE32F-NEXT: .LBB84_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -9869,8 +9695,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_7
+; RV64ZVE32F-NEXT: j .LBB84_6
; RV64ZVE32F-NEXT: .LBB84_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -9881,9 +9706,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_8
-; RV64ZVE32F-NEXT: j .LBB84_9
+; RV64ZVE32F-NEXT: j .LBB84_7
%ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
@@ -9943,7 +9766,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB85_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB85_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB85_15
; RV64ZVE32F-NEXT: .LBB85_6: # %else8
@@ -9952,7 +9775,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: .LBB85_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB85_9
-; RV64ZVE32F-NEXT: .LBB85_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10004,8 +9827,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB85_6
+; RV64ZVE32F-NEXT: j .LBB85_5
; RV64ZVE32F-NEXT: .LBB85_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -10017,8 +9839,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB85_7
+; RV64ZVE32F-NEXT: j .LBB85_6
; RV64ZVE32F-NEXT: .LBB85_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10029,9 +9850,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB85_8
-; RV64ZVE32F-NEXT: j .LBB85_9
+; RV64ZVE32F-NEXT: j .LBB85_7
%eidxs = sext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
@@ -10095,7 +9914,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB86_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB86_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB86_15
; RV64ZVE32F-NEXT: .LBB86_6: # %else8
@@ -10104,7 +9923,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: .LBB86_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB86_9
-; RV64ZVE32F-NEXT: .LBB86_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10160,8 +9979,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB86_6
+; RV64ZVE32F-NEXT: j .LBB86_5
; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -10174,8 +9992,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB86_7
+; RV64ZVE32F-NEXT: j .LBB86_6
; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10187,9 +10004,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB86_8
-; RV64ZVE32F-NEXT: j .LBB86_9
+; RV64ZVE32F-NEXT: j .LBB86_7
%eidxs = zext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
@@ -10252,7 +10067,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB87_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB87_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB87_15
; RV64ZVE32F-NEXT: .LBB87_6: # %else8
@@ -10261,7 +10076,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: .LBB87_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB87_9
-; RV64ZVE32F-NEXT: .LBB87_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10313,8 +10128,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB87_6
+; RV64ZVE32F-NEXT: j .LBB87_5
; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -10326,8 +10140,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB87_7
+; RV64ZVE32F-NEXT: j .LBB87_6
; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10338,9 +10151,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB87_8
-; RV64ZVE32F-NEXT: j .LBB87_9
+; RV64ZVE32F-NEXT: j .LBB87_7
%ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
@@ -10402,7 +10213,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB88_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB88_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB88_15
; RV64ZVE32F-NEXT: .LBB88_6: # %else8
@@ -10411,7 +10222,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: .LBB88_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB88_9
-; RV64ZVE32F-NEXT: .LBB88_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10463,8 +10274,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB88_6
+; RV64ZVE32F-NEXT: j .LBB88_5
; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -10476,8 +10286,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB88_7
+; RV64ZVE32F-NEXT: j .LBB88_6
; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10488,9 +10297,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB88_8
-; RV64ZVE32F-NEXT: j .LBB88_9
+; RV64ZVE32F-NEXT: j .LBB88_7
%eidxs = sext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
@@ -10555,7 +10362,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB89_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB89_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB89_15
; RV64ZVE32F-NEXT: .LBB89_6: # %else8
@@ -10564,7 +10371,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: .LBB89_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB89_9
-; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10620,8 +10427,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB89_6
+; RV64ZVE32F-NEXT: j .LBB89_5
; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -10634,8 +10440,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB89_7
+; RV64ZVE32F-NEXT: j .LBB89_6
; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10647,9 +10452,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB89_8
-; RV64ZVE32F-NEXT: j .LBB89_9
+; RV64ZVE32F-NEXT: j .LBB89_7
%eidxs = zext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
@@ -10706,7 +10509,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB90_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB90_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB90_15
; RV64ZVE32F-NEXT: .LBB90_6: # %else8
@@ -10715,7 +10518,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: .LBB90_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB90_9
-; RV64ZVE32F-NEXT: .LBB90_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10763,8 +10566,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB90_6
+; RV64ZVE32F-NEXT: j .LBB90_5
; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -10774,8 +10576,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB90_7
+; RV64ZVE32F-NEXT: j .LBB90_6
; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@@ -10784,9 +10585,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB90_8
-; RV64ZVE32F-NEXT: j .LBB90_9
+; RV64ZVE32F-NEXT: j .LBB90_7
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
@@ -10857,17 +10656,16 @@ define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %p
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
; RV32ZVE32F-NEXT: bnez a1, .LBB92_3
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB92_1: # %else
; RV32ZVE32F-NEXT: andi a0, a0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB92_4
-; RV32ZVE32F-NEXT: .LBB92_2: # %else2
+; RV32ZVE32F-NEXT: # %bb.2: # %else2
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB92_3: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a0, a0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB92_2
+; RV32ZVE32F-NEXT: j .LBB92_1
; RV32ZVE32F-NEXT: .LBB92_4: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -10881,15 +10679,14 @@ define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %p
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB92_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB92_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB92_4
-; RV64ZVE32F-NEXT: .LBB92_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB92_3: # %cond.load
; RV64ZVE32F-NEXT: fld fa0, 0(a0)
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_2
+; RV64ZVE32F-NEXT: j .LBB92_1
; RV64ZVE32F-NEXT: .LBB92_4: # %cond.load1
; RV64ZVE32F-NEXT: fld fa1, 0(a1)
; RV64ZVE32F-NEXT: ret
@@ -10920,7 +10717,7 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB93_6
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB93_1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB93_7
; RV32ZVE32F-NEXT: .LBB93_2: # %else2
@@ -10929,7 +10726,7 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV32ZVE32F-NEXT: .LBB93_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a1, 8
; RV32ZVE32F-NEXT: beqz a1, .LBB93_5
-; RV32ZVE32F-NEXT: .LBB93_4: # %cond.load7
+; RV32ZVE32F-NEXT: # %bb.4: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10944,23 +10741,19 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: beqz a2, .LBB93_2
+; RV32ZVE32F-NEXT: j .LBB93_1
; RV32ZVE32F-NEXT: .LBB93_7: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: beqz a2, .LBB93_3
+; RV32ZVE32F-NEXT: j .LBB93_2
; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
-; RV32ZVE32F-NEXT: andi a1, a1, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB93_4
-; RV32ZVE32F-NEXT: j .LBB93_5
+; RV32ZVE32F-NEXT: j .LBB93_3
;
; RV64ZVE32F-LABEL: mgather_v4f64:
; RV64ZVE32F: # %bb.0:
@@ -10968,7 +10761,7 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB93_6
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB93_1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB93_7
; RV64ZVE32F-NEXT: .LBB93_2: # %else2
@@ -10977,7 +10770,7 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV64ZVE32F-NEXT: .LBB93_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a2, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB93_5
-; RV64ZVE32F-NEXT: .LBB93_4: # %cond.load7
+; RV64ZVE32F-NEXT: # %bb.4: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a1)
; RV64ZVE32F-NEXT: .LBB93_5: # %else8
@@ -10989,19 +10782,15 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV64ZVE32F-NEXT: .LBB93_6: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB93_2
+; RV64ZVE32F-NEXT: j .LBB93_1
; RV64ZVE32F-NEXT: .LBB93_7: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 4
-; RV64ZVE32F-NEXT: beqz a3, .LBB93_3
+; RV64ZVE32F-NEXT: j .LBB93_2
; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a2, a2, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB93_4
-; RV64ZVE32F-NEXT: j .LBB93_5
+; RV64ZVE32F-NEXT: j .LBB93_3
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x double> %passthru)
ret <4 x double> %v
}
@@ -11114,7 +10903,7 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB96_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB96_1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB96_11
; RV32ZVE32F-NEXT: .LBB96_2: # %else2
@@ -11135,7 +10924,7 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV32ZVE32F-NEXT: .LBB96_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB96_9
-; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11154,51 +10943,43 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: beqz a2, .LBB96_2
+; RV32ZVE32F-NEXT: j .LBB96_1
; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: beqz a2, .LBB96_3
+; RV32ZVE32F-NEXT: j .LBB96_2
; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 8
-; RV32ZVE32F-NEXT: beqz a2, .LBB96_4
+; RV32ZVE32F-NEXT: j .LBB96_3
; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 16
-; RV32ZVE32F-NEXT: beqz a2, .LBB96_5
+; RV32ZVE32F-NEXT: j .LBB96_4
; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 32
-; RV32ZVE32F-NEXT: beqz a2, .LBB96_6
+; RV32ZVE32F-NEXT: j .LBB96_5
; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 64
-; RV32ZVE32F-NEXT: beqz a2, .LBB96_7
+; RV32ZVE32F-NEXT: j .LBB96_6
; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
-; RV32ZVE32F-NEXT: andi a1, a1, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB96_8
-; RV32ZVE32F-NEXT: j .LBB96_9
+; RV32ZVE32F-NEXT: j .LBB96_7
;
; RV64ZVE32F-LABEL: mgather_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -11206,7 +10987,7 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB96_10
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB96_1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB96_11
; RV64ZVE32F-NEXT: .LBB96_2: # %else2
@@ -11227,7 +11008,7 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV64ZVE32F-NEXT: .LBB96_7: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB96_9
-; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV64ZVE32F-NEXT: ld a1, 56(a1)
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB96_9: # %else20
@@ -11243,39 +11024,31 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_2
+; RV64ZVE32F-NEXT: j .LBB96_1
; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 4
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_3
+; RV64ZVE32F-NEXT: j .LBB96_2
; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_4
+; RV64ZVE32F-NEXT: j .LBB96_3
; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7
; RV64ZVE32F-NEXT: ld a3, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_5
+; RV64ZVE32F-NEXT: j .LBB96_4
; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10
; RV64ZVE32F-NEXT: ld a3, 32(a1)
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_6
+; RV64ZVE32F-NEXT: j .LBB96_5
; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13
; RV64ZVE32F-NEXT: ld a3, 40(a1)
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 64
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_7
+; RV64ZVE32F-NEXT: j .LBB96_6
; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16
; RV64ZVE32F-NEXT: ld a3, 48(a1)
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
-; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: bnez a2, .LBB96_8
-; RV64ZVE32F-NEXT: j .LBB96_9
+; RV64ZVE32F-NEXT: j .LBB96_7
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
@@ -11311,7 +11084,7 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB97_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB97_1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB97_11
; RV32ZVE32F-NEXT: .LBB97_2: # %else2
@@ -11332,7 +11105,7 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV32ZVE32F-NEXT: .LBB97_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB97_9
-; RV32ZVE32F-NEXT: .LBB97_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11350,51 +11123,43 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV32ZVE32F-NEXT: .LBB97_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB97_2
+; RV32ZVE32F-NEXT: j .LBB97_1
; RV32ZVE32F-NEXT: .LBB97_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB97_3
+; RV32ZVE32F-NEXT: j .LBB97_2
; RV32ZVE32F-NEXT: .LBB97_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB97_4
+; RV32ZVE32F-NEXT: j .LBB97_3
; RV32ZVE32F-NEXT: .LBB97_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB97_5
+; RV32ZVE32F-NEXT: j .LBB97_4
; RV32ZVE32F-NEXT: .LBB97_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB97_6
+; RV32ZVE32F-NEXT: j .LBB97_5
; RV32ZVE32F-NEXT: .LBB97_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB97_7
+; RV32ZVE32F-NEXT: j .LBB97_6
; RV32ZVE32F-NEXT: .LBB97_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB97_8
-; RV32ZVE32F-NEXT: j .LBB97_9
+; RV32ZVE32F-NEXT: j .LBB97_7
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -11424,7 +11189,7 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB97_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB97_5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB97_15
; RV64ZVE32F-NEXT: .LBB97_6: # %else8
@@ -11433,7 +11198,7 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: .LBB97_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB97_9
-; RV64ZVE32F-NEXT: .LBB97_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
@@ -11472,24 +11237,20 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB97_6
+; RV64ZVE32F-NEXT: j .LBB97_5
; RV64ZVE32F-NEXT: .LBB97_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB97_7
+; RV64ZVE32F-NEXT: j .LBB97_6
; RV64ZVE32F-NEXT: .LBB97_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB97_8
-; RV64ZVE32F-NEXT: j .LBB97_9
+; RV64ZVE32F-NEXT: j .LBB97_7
%ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -11526,7 +11287,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB98_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB98_1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB98_11
; RV32ZVE32F-NEXT: .LBB98_2: # %else2
@@ -11547,7 +11308,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: .LBB98_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB98_9
-; RV32ZVE32F-NEXT: .LBB98_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11565,51 +11326,43 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: .LBB98_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB98_2
+; RV32ZVE32F-NEXT: j .LBB98_1
; RV32ZVE32F-NEXT: .LBB98_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB98_3
+; RV32ZVE32F-NEXT: j .LBB98_2
; RV32ZVE32F-NEXT: .LBB98_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB98_4
+; RV32ZVE32F-NEXT: j .LBB98_3
; RV32ZVE32F-NEXT: .LBB98_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB98_5
+; RV32ZVE32F-NEXT: j .LBB98_4
; RV32ZVE32F-NEXT: .LBB98_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB98_6
+; RV32ZVE32F-NEXT: j .LBB98_5
; RV32ZVE32F-NEXT: .LBB98_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB98_7
+; RV32ZVE32F-NEXT: j .LBB98_6
; RV32ZVE32F-NEXT: .LBB98_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB98_8
-; RV32ZVE32F-NEXT: j .LBB98_9
+; RV32ZVE32F-NEXT: j .LBB98_7
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -11639,7 +11392,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB98_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB98_5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB98_15
; RV64ZVE32F-NEXT: .LBB98_6: # %else8
@@ -11648,7 +11401,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB98_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB98_9
-; RV64ZVE32F-NEXT: .LBB98_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
@@ -11687,24 +11440,20 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB98_6
+; RV64ZVE32F-NEXT: j .LBB98_5
; RV64ZVE32F-NEXT: .LBB98_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB98_7
+; RV64ZVE32F-NEXT: j .LBB98_6
; RV64ZVE32F-NEXT: .LBB98_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB98_8
-; RV64ZVE32F-NEXT: j .LBB98_9
+; RV64ZVE32F-NEXT: j .LBB98_7
%eidxs = sext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -11743,7 +11492,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB99_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB99_1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB99_11
; RV32ZVE32F-NEXT: .LBB99_2: # %else2
@@ -11764,7 +11513,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: .LBB99_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB99_9
-; RV32ZVE32F-NEXT: .LBB99_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11782,51 +11531,43 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: .LBB99_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB99_2
+; RV32ZVE32F-NEXT: j .LBB99_1
; RV32ZVE32F-NEXT: .LBB99_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB99_3
+; RV32ZVE32F-NEXT: j .LBB99_2
; RV32ZVE32F-NEXT: .LBB99_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB99_4
+; RV32ZVE32F-NEXT: j .LBB99_3
; RV32ZVE32F-NEXT: .LBB99_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB99_5
+; RV32ZVE32F-NEXT: j .LBB99_4
; RV32ZVE32F-NEXT: .LBB99_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB99_6
+; RV32ZVE32F-NEXT: j .LBB99_5
; RV32ZVE32F-NEXT: .LBB99_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB99_7
+; RV32ZVE32F-NEXT: j .LBB99_6
; RV32ZVE32F-NEXT: .LBB99_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB99_8
-; RV32ZVE32F-NEXT: j .LBB99_9
+; RV32ZVE32F-NEXT: j .LBB99_7
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -11858,7 +11599,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB99_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB99_5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB99_15
; RV64ZVE32F-NEXT: .LBB99_6: # %else8
@@ -11867,7 +11608,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB99_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB99_9
-; RV64ZVE32F-NEXT: .LBB99_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
@@ -11910,8 +11651,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB99_6
+; RV64ZVE32F-NEXT: j .LBB99_5
; RV64ZVE32F-NEXT: .LBB99_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -11919,17 +11659,14 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB99_7
+; RV64ZVE32F-NEXT: j .LBB99_6
; RV64ZVE32F-NEXT: .LBB99_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB99_8
-; RV64ZVE32F-NEXT: j .LBB99_9
+; RV64ZVE32F-NEXT: j .LBB99_7
%eidxs = zext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -11967,7 +11704,7 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a2, v8
; RV32ZVE32F-NEXT: bnez a3, .LBB100_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB100_1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB100_11
; RV32ZVE32F-NEXT: .LBB100_2: # %else2
@@ -11988,7 +11725,7 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV32ZVE32F-NEXT: .LBB100_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB100_9
-; RV32ZVE32F-NEXT: .LBB100_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12007,51 +11744,43 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: beqz a2, .LBB100_2
+; RV32ZVE32F-NEXT: j .LBB100_1
; RV32ZVE32F-NEXT: .LBB100_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: beqz a2, .LBB100_3
+; RV32ZVE32F-NEXT: j .LBB100_2
; RV32ZVE32F-NEXT: .LBB100_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 8
-; RV32ZVE32F-NEXT: beqz a2, .LBB100_4
+; RV32ZVE32F-NEXT: j .LBB100_3
; RV32ZVE32F-NEXT: .LBB100_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 16
-; RV32ZVE32F-NEXT: beqz a2, .LBB100_5
+; RV32ZVE32F-NEXT: j .LBB100_4
; RV32ZVE32F-NEXT: .LBB100_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 32
-; RV32ZVE32F-NEXT: beqz a2, .LBB100_6
+; RV32ZVE32F-NEXT: j .LBB100_5
; RV32ZVE32F-NEXT: .LBB100_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 64
-; RV32ZVE32F-NEXT: beqz a2, .LBB100_7
+; RV32ZVE32F-NEXT: j .LBB100_6
; RV32ZVE32F-NEXT: .LBB100_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
-; RV32ZVE32F-NEXT: andi a1, a1, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB100_8
-; RV32ZVE32F-NEXT: j .LBB100_9
+; RV32ZVE32F-NEXT: j .LBB100_7
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12082,7 +11811,7 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB100_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB100_5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB100_15
; RV64ZVE32F-NEXT: .LBB100_6: # %else8
@@ -12091,7 +11820,7 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV64ZVE32F-NEXT: .LBB100_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB100_9
-; RV64ZVE32F-NEXT: .LBB100_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
@@ -12130,24 +11859,20 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB100_6
+; RV64ZVE32F-NEXT: j .LBB100_5
; RV64ZVE32F-NEXT: .LBB100_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB100_7
+; RV64ZVE32F-NEXT: j .LBB100_6
; RV64ZVE32F-NEXT: .LBB100_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB100_8
-; RV64ZVE32F-NEXT: j .LBB100_9
+; RV64ZVE32F-NEXT: j .LBB100_7
%ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -12184,7 +11909,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a2, v8
; RV32ZVE32F-NEXT: bnez a3, .LBB101_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB101_1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB101_11
; RV32ZVE32F-NEXT: .LBB101_2: # %else2
@@ -12205,7 +11930,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: .LBB101_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB101_9
-; RV32ZVE32F-NEXT: .LBB101_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12224,51 +11949,43 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: beqz a2, .LBB101_2
+; RV32ZVE32F-NEXT: j .LBB101_1
; RV32ZVE32F-NEXT: .LBB101_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: beqz a2, .LBB101_3
+; RV32ZVE32F-NEXT: j .LBB101_2
; RV32ZVE32F-NEXT: .LBB101_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 8
-; RV32ZVE32F-NEXT: beqz a2, .LBB101_4
+; RV32ZVE32F-NEXT: j .LBB101_3
; RV32ZVE32F-NEXT: .LBB101_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 16
-; RV32ZVE32F-NEXT: beqz a2, .LBB101_5
+; RV32ZVE32F-NEXT: j .LBB101_4
; RV32ZVE32F-NEXT: .LBB101_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 32
-; RV32ZVE32F-NEXT: beqz a2, .LBB101_6
+; RV32ZVE32F-NEXT: j .LBB101_5
; RV32ZVE32F-NEXT: .LBB101_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 64
-; RV32ZVE32F-NEXT: beqz a2, .LBB101_7
+; RV32ZVE32F-NEXT: j .LBB101_6
; RV32ZVE32F-NEXT: .LBB101_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
-; RV32ZVE32F-NEXT: andi a1, a1, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB101_8
-; RV32ZVE32F-NEXT: j .LBB101_9
+; RV32ZVE32F-NEXT: j .LBB101_7
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12299,7 +12016,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB101_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB101_5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB101_15
; RV64ZVE32F-NEXT: .LBB101_6: # %else8
@@ -12308,7 +12025,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: .LBB101_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB101_9
-; RV64ZVE32F-NEXT: .LBB101_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
@@ -12347,24 +12064,20 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB101_6
+; RV64ZVE32F-NEXT: j .LBB101_5
; RV64ZVE32F-NEXT: .LBB101_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB101_7
+; RV64ZVE32F-NEXT: j .LBB101_6
; RV64ZVE32F-NEXT: .LBB101_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB101_8
-; RV64ZVE32F-NEXT: j .LBB101_9
+; RV64ZVE32F-NEXT: j .LBB101_7
%eidxs = sext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -12403,7 +12116,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccu.vx v10, a2, v8
; RV32ZVE32F-NEXT: bnez a3, .LBB102_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB102_1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB102_11
; RV32ZVE32F-NEXT: .LBB102_2: # %else2
@@ -12424,7 +12137,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: .LBB102_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB102_9
-; RV32ZVE32F-NEXT: .LBB102_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12443,51 +12156,43 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: beqz a2, .LBB102_2
+; RV32ZVE32F-NEXT: j .LBB102_1
; RV32ZVE32F-NEXT: .LBB102_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: beqz a2, .LBB102_3
+; RV32ZVE32F-NEXT: j .LBB102_2
; RV32ZVE32F-NEXT: .LBB102_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 8
-; RV32ZVE32F-NEXT: beqz a2, .LBB102_4
+; RV32ZVE32F-NEXT: j .LBB102_3
; RV32ZVE32F-NEXT: .LBB102_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 16
-; RV32ZVE32F-NEXT: beqz a2, .LBB102_5
+; RV32ZVE32F-NEXT: j .LBB102_4
; RV32ZVE32F-NEXT: .LBB102_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 32
-; RV32ZVE32F-NEXT: beqz a2, .LBB102_6
+; RV32ZVE32F-NEXT: j .LBB102_5
; RV32ZVE32F-NEXT: .LBB102_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
-; RV32ZVE32F-NEXT: andi a2, a1, 64
-; RV32ZVE32F-NEXT: beqz a2, .LBB102_7
+; RV32ZVE32F-NEXT: j .LBB102_6
; RV32ZVE32F-NEXT: .LBB102_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
-; RV32ZVE32F-NEXT: andi a1, a1, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB102_8
-; RV32ZVE32F-NEXT: j .LBB102_9
+; RV32ZVE32F-NEXT: j .LBB102_7
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12520,7 +12225,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB102_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB102_5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB102_15
; RV64ZVE32F-NEXT: .LBB102_6: # %else8
@@ -12529,7 +12234,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: .LBB102_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB102_9
-; RV64ZVE32F-NEXT: .LBB102_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 48
@@ -12572,8 +12277,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: srli a3, a3, 45
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB102_6
+; RV64ZVE32F-NEXT: j .LBB102_5
; RV64ZVE32F-NEXT: .LBB102_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -12581,17 +12285,14 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: srli a3, a3, 45
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB102_7
+; RV64ZVE32F-NEXT: j .LBB102_6
; RV64ZVE32F-NEXT: .LBB102_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 48
; RV64ZVE32F-NEXT: srli a3, a3, 45
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB102_8
-; RV64ZVE32F-NEXT: j .LBB102_9
+; RV64ZVE32F-NEXT: j .LBB102_7
%eidxs = zext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -12628,7 +12329,7 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB103_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB103_1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB103_11
; RV32ZVE32F-NEXT: .LBB103_2: # %else2
@@ -12649,7 +12350,7 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV32ZVE32F-NEXT: .LBB103_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB103_9
-; RV32ZVE32F-NEXT: .LBB103_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12667,51 +12368,43 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV32ZVE32F-NEXT: .LBB103_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB103_2
+; RV32ZVE32F-NEXT: j .LBB103_1
; RV32ZVE32F-NEXT: .LBB103_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB103_3
+; RV32ZVE32F-NEXT: j .LBB103_2
; RV32ZVE32F-NEXT: .LBB103_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB103_4
+; RV32ZVE32F-NEXT: j .LBB103_3
; RV32ZVE32F-NEXT: .LBB103_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB103_5
+; RV32ZVE32F-NEXT: j .LBB103_4
; RV32ZVE32F-NEXT: .LBB103_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB103_6
+; RV32ZVE32F-NEXT: j .LBB103_5
; RV32ZVE32F-NEXT: .LBB103_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB103_7
+; RV32ZVE32F-NEXT: j .LBB103_6
; RV32ZVE32F-NEXT: .LBB103_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB103_8
-; RV32ZVE32F-NEXT: j .LBB103_9
+; RV32ZVE32F-NEXT: j .LBB103_7
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12742,7 +12435,7 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB103_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB103_5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB103_15
; RV64ZVE32F-NEXT: .LBB103_6: # %else8
@@ -12751,7 +12444,7 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV64ZVE32F-NEXT: .LBB103_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB103_9
-; RV64ZVE32F-NEXT: .LBB103_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
@@ -12790,24 +12483,20 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB103_6
+; RV64ZVE32F-NEXT: j .LBB103_5
; RV64ZVE32F-NEXT: .LBB103_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB103_7
+; RV64ZVE32F-NEXT: j .LBB103_6
; RV64ZVE32F-NEXT: .LBB103_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB103_8
-; RV64ZVE32F-NEXT: j .LBB103_9
+; RV64ZVE32F-NEXT: j .LBB103_7
%ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -12843,7 +12532,7 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB104_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB104_1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB104_11
; RV32ZVE32F-NEXT: .LBB104_2: # %else2
@@ -12864,7 +12553,7 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: .LBB104_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB104_9
-; RV32ZVE32F-NEXT: .LBB104_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12882,51 +12571,43 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: .LBB104_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB104_2
+; RV32ZVE32F-NEXT: j .LBB104_1
; RV32ZVE32F-NEXT: .LBB104_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB104_3
+; RV32ZVE32F-NEXT: j .LBB104_2
; RV32ZVE32F-NEXT: .LBB104_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB104_4
+; RV32ZVE32F-NEXT: j .LBB104_3
; RV32ZVE32F-NEXT: .LBB104_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB104_5
+; RV32ZVE32F-NEXT: j .LBB104_4
; RV32ZVE32F-NEXT: .LBB104_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB104_6
+; RV32ZVE32F-NEXT: j .LBB104_5
; RV32ZVE32F-NEXT: .LBB104_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB104_7
+; RV32ZVE32F-NEXT: j .LBB104_6
; RV32ZVE32F-NEXT: .LBB104_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB104_8
-; RV32ZVE32F-NEXT: j .LBB104_9
+; RV32ZVE32F-NEXT: j .LBB104_7
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12957,7 +12638,7 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB104_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB104_5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB104_15
; RV64ZVE32F-NEXT: .LBB104_6: # %else8
@@ -12966,7 +12647,7 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: .LBB104_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB104_9
-; RV64ZVE32F-NEXT: .LBB104_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
@@ -13005,24 +12686,20 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB104_6
+; RV64ZVE32F-NEXT: j .LBB104_5
; RV64ZVE32F-NEXT: .LBB104_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB104_7
+; RV64ZVE32F-NEXT: j .LBB104_6
; RV64ZVE32F-NEXT: .LBB104_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB104_8
-; RV64ZVE32F-NEXT: j .LBB104_9
+; RV64ZVE32F-NEXT: j .LBB104_7
%eidxs = sext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -13059,7 +12736,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB105_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB105_1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB105_11
; RV32ZVE32F-NEXT: .LBB105_2: # %else2
@@ -13080,7 +12757,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: .LBB105_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB105_9
-; RV32ZVE32F-NEXT: .LBB105_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -13098,51 +12775,43 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: .LBB105_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB105_2
+; RV32ZVE32F-NEXT: j .LBB105_1
; RV32ZVE32F-NEXT: .LBB105_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB105_3
+; RV32ZVE32F-NEXT: j .LBB105_2
; RV32ZVE32F-NEXT: .LBB105_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB105_4
+; RV32ZVE32F-NEXT: j .LBB105_3
; RV32ZVE32F-NEXT: .LBB105_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB105_5
+; RV32ZVE32F-NEXT: j .LBB105_4
; RV32ZVE32F-NEXT: .LBB105_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB105_6
+; RV32ZVE32F-NEXT: j .LBB105_5
; RV32ZVE32F-NEXT: .LBB105_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB105_7
+; RV32ZVE32F-NEXT: j .LBB105_6
; RV32ZVE32F-NEXT: .LBB105_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB105_8
-; RV32ZVE32F-NEXT: j .LBB105_9
+; RV32ZVE32F-NEXT: j .LBB105_7
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -13175,7 +12844,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB105_14
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB105_5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB105_15
; RV64ZVE32F-NEXT: .LBB105_6: # %else8
@@ -13184,7 +12853,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: .LBB105_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB105_9
-; RV64ZVE32F-NEXT: .LBB105_8: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
@@ -13227,8 +12896,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB105_6
+; RV64ZVE32F-NEXT: j .LBB105_5
; RV64ZVE32F-NEXT: .LBB105_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -13236,17 +12904,14 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB105_7
+; RV64ZVE32F-NEXT: j .LBB105_6
; RV64ZVE32F-NEXT: .LBB105_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB105_8
-; RV64ZVE32F-NEXT: j .LBB105_9
+; RV64ZVE32F-NEXT: j .LBB105_7
%eidxs = zext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -13298,7 +12963,7 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB106_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB106_1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB106_11
; RV32ZVE32F-NEXT: .LBB106_2: # %else2
@@ -13319,7 +12984,7 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: .LBB106_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB106_9
-; RV32ZVE32F-NEXT: .LBB106_8: # %cond.load19
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -13337,51 +13002,43 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: .LBB106_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB106_2
+; RV32ZVE32F-NEXT: j .LBB106_1
; RV32ZVE32F-NEXT: .LBB106_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB106_3
+; RV32ZVE32F-NEXT: j .LBB106_2
; RV32ZVE32F-NEXT: .LBB106_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB106_4
+; RV32ZVE32F-NEXT: j .LBB106_3
; RV32ZVE32F-NEXT: .LBB106_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB106_5
+; RV32ZVE32F-NEXT: j .LBB106_4
; RV32ZVE32F-NEXT: .LBB106_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB106_6
+; RV32ZVE32F-NEXT: j .LBB106_5
; RV32ZVE32F-NEXT: .LBB106_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB106_7
+; RV32ZVE32F-NEXT: j .LBB106_6
; RV32ZVE32F-NEXT: .LBB106_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB106_8
-; RV32ZVE32F-NEXT: j .LBB106_9
+; RV32ZVE32F-NEXT: j .LBB106_7
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -13389,7 +13046,7 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a4, a3, 1
; RV64ZVE32F-NEXT: bnez a4, .LBB106_10
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB106_1: # %else
; RV64ZVE32F-NEXT: andi a4, a3, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB106_11
; RV64ZVE32F-NEXT: .LBB106_2: # %else2
@@ -13410,7 +13067,7 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: .LBB106_7: # %else17
; RV64ZVE32F-NEXT: andi a3, a3, -128
; RV64ZVE32F-NEXT: beqz a3, .LBB106_9
-; RV64ZVE32F-NEXT: .LBB106_8: # %cond.load19
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.load19
; RV64ZVE32F-NEXT: ld a2, 56(a2)
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
@@ -13430,51 +13087,43 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa0, 0(a4)
-; RV64ZVE32F-NEXT: andi a4, a3, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB106_2
+; RV64ZVE32F-NEXT: j .LBB106_1
; RV64ZVE32F-NEXT: .LBB106_11: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa1, 0(a4)
-; RV64ZVE32F-NEXT: andi a4, a3, 4
-; RV64ZVE32F-NEXT: beqz a4, .LBB106_3
+; RV64ZVE32F-NEXT: j .LBB106_2
; RV64ZVE32F-NEXT: .LBB106_12: # %cond.load4
; RV64ZVE32F-NEXT: ld a4, 16(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa2, 0(a4)
-; RV64ZVE32F-NEXT: andi a4, a3, 8
-; RV64ZVE32F-NEXT: beqz a4, .LBB106_4
+; RV64ZVE32F-NEXT: j .LBB106_3
; RV64ZVE32F-NEXT: .LBB106_13: # %cond.load7
; RV64ZVE32F-NEXT: ld a4, 24(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa3, 0(a4)
-; RV64ZVE32F-NEXT: andi a4, a3, 16
-; RV64ZVE32F-NEXT: beqz a4, .LBB106_5
+; RV64ZVE32F-NEXT: j .LBB106_4
; RV64ZVE32F-NEXT: .LBB106_14: # %cond.load10
; RV64ZVE32F-NEXT: ld a4, 32(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa4, 0(a4)
-; RV64ZVE32F-NEXT: andi a4, a3, 32
-; RV64ZVE32F-NEXT: beqz a4, .LBB106_6
+; RV64ZVE32F-NEXT: j .LBB106_5
; RV64ZVE32F-NEXT: .LBB106_15: # %cond.load13
; RV64ZVE32F-NEXT: ld a4, 40(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa5, 0(a4)
-; RV64ZVE32F-NEXT: andi a4, a3, 64
-; RV64ZVE32F-NEXT: beqz a4, .LBB106_7
+; RV64ZVE32F-NEXT: j .LBB106_6
; RV64ZVE32F-NEXT: .LBB106_16: # %cond.load16
; RV64ZVE32F-NEXT: ld a4, 48(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa6, 0(a4)
-; RV64ZVE32F-NEXT: andi a3, a3, -128
-; RV64ZVE32F-NEXT: bnez a3, .LBB106_8
-; RV64ZVE32F-NEXT: j .LBB106_9
+; RV64ZVE32F-NEXT: j .LBB106_7
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -13532,13 +13181,13 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB107_25
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB107_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB107_26
; RV64ZVE32F-NEXT: .LBB107_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB107_8
-; RV64ZVE32F-NEXT: .LBB107_7: # %cond.load10
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -13564,7 +13213,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB107_27
-; RV64ZVE32F-NEXT: # %bb.11: # %else17
+; RV64ZVE32F-NEXT: .LBB107_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB107_28
; RV64ZVE32F-NEXT: .LBB107_12: # %else20
@@ -13573,7 +13222,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: .LBB107_13: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB107_15
-; RV64ZVE32F-NEXT: .LBB107_14: # %cond.load25
+; RV64ZVE32F-NEXT: # %bb.14: # %cond.load25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -13589,7 +13238,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB107_30
-; RV64ZVE32F-NEXT: # %bb.16: # %else29
+; RV64ZVE32F-NEXT: .LBB107_16: # %else29
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: bltz a2, .LBB107_31
; RV64ZVE32F-NEXT: .LBB107_17: # %else32
@@ -13598,7 +13247,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: .LBB107_18: # %else35
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bgez a2, .LBB107_20
-; RV64ZVE32F-NEXT: .LBB107_19: # %cond.load37
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -13643,8 +13292,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v12, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB107_6
+; RV64ZVE32F-NEXT: j .LBB107_5
; RV64ZVE32F-NEXT: .LBB107_26: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
@@ -13654,9 +13302,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB107_7
-; RV64ZVE32F-NEXT: j .LBB107_8
+; RV64ZVE32F-NEXT: j .LBB107_6
; RV64ZVE32F-NEXT: .LBB107_27: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -13664,8 +13310,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6
-; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: beqz a2, .LBB107_12
+; RV64ZVE32F-NEXT: j .LBB107_11
; RV64ZVE32F-NEXT: .LBB107_28: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -13675,8 +13320,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7
-; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: beqz a2, .LBB107_13
+; RV64ZVE32F-NEXT: j .LBB107_12
; RV64ZVE32F-NEXT: .LBB107_29: # %cond.load22
; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -13684,9 +13328,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8
-; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: bnez a2, .LBB107_14
-; RV64ZVE32F-NEXT: j .LBB107_15
+; RV64ZVE32F-NEXT: j .LBB107_13
; RV64ZVE32F-NEXT: .LBB107_30: # %cond.load28
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -13694,8 +13336,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10
-; RV64ZVE32F-NEXT: slli a2, a1, 52
-; RV64ZVE32F-NEXT: bgez a2, .LBB107_17
+; RV64ZVE32F-NEXT: j .LBB107_16
; RV64ZVE32F-NEXT: .LBB107_31: # %cond.load31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -13705,8 +13346,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 11
-; RV64ZVE32F-NEXT: slli a2, a1, 51
-; RV64ZVE32F-NEXT: bgez a2, .LBB107_18
+; RV64ZVE32F-NEXT: j .LBB107_17
; RV64ZVE32F-NEXT: .LBB107_32: # %cond.load34
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -13714,9 +13354,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 12
-; RV64ZVE32F-NEXT: slli a2, a1, 50
-; RV64ZVE32F-NEXT: bltz a2, .LBB107_19
-; RV64ZVE32F-NEXT: j .LBB107_20
+; RV64ZVE32F-NEXT: j .LBB107_18
%ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
%v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru)
ret <16 x i8> %v
@@ -13787,13 +13425,13 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB108_49
-; RV64ZVE32F-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-NEXT: .LBB108_5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB108_50
; RV64ZVE32F-NEXT: .LBB108_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB108_8
-; RV64ZVE32F-NEXT: .LBB108_7: # %cond.load10
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -13819,7 +13457,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB108_51
-; RV64ZVE32F-NEXT: # %bb.11: # %else17
+; RV64ZVE32F-NEXT: .LBB108_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB108_52
; RV64ZVE32F-NEXT: .LBB108_12: # %else20
@@ -13828,7 +13466,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: .LBB108_13: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB108_15
-; RV64ZVE32F-NEXT: .LBB108_14: # %cond.load25
+; RV64ZVE32F-NEXT: # %bb.14: # %cond.load25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@@ -13892,7 +13530,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB108_54
-; RV64ZVE32F-NEXT: # %bb.24: # %else41
+; RV64ZVE32F-NEXT: .LBB108_24: # %else41
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bltz a2, .LBB108_55
; RV64ZVE32F-NEXT: .LBB108_25: # %else44
@@ -13901,7 +13539,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: .LBB108_26: # %else47
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bgez a2, .LBB108_28
-; RV64ZVE32F-NEXT: .LBB108_27: # %cond.load49
+; RV64ZVE32F-NEXT: # %bb.27: # %cond.load49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -13917,13 +13555,13 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB108_57
-; RV64ZVE32F-NEXT: # %bb.29: # %else53
+; RV64ZVE32F-NEXT: .LBB108_29: # %else53
; RV64ZVE32F-NEXT: slli a2, a1, 44
; RV64ZVE32F-NEXT: bltz a2, .LBB108_58
; RV64ZVE32F-NEXT: .LBB108_30: # %else56
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bgez a2, .LBB108_32
-; RV64ZVE32F-NEXT: .LBB108_31: # %cond.load58
+; RV64ZVE32F-NEXT: # %bb.31: # %cond.load58
; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -13949,7 +13587,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB108_59
-; RV64ZVE32F-NEXT: # %bb.35: # %else65
+; RV64ZVE32F-NEXT: .LBB108_35: # %else65
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bltz a2, .LBB108_60
; RV64ZVE32F-NEXT: .LBB108_36: # %else68
@@ -13958,7 +13596,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: .LBB108_37: # %else71
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bgez a2, .LBB108_39
-; RV64ZVE32F-NEXT: .LBB108_38: # %cond.load73
+; RV64ZVE32F-NEXT: # %bb.38: # %cond.load73
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -13974,7 +13612,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB108_62
-; RV64ZVE32F-NEXT: # %bb.40: # %else77
+; RV64ZVE32F-NEXT: .LBB108_40: # %else77
; RV64ZVE32F-NEXT: slli a2, a1, 36
; RV64ZVE32F-NEXT: bltz a2, .LBB108_63
; RV64ZVE32F-NEXT: .LBB108_41: # %else80
@@ -13983,7 +13621,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: .LBB108_42: # %else83
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bgez a2, .LBB108_44
-; RV64ZVE32F-NEXT: .LBB108_43: # %cond.load85
+; RV64ZVE32F-NEXT: # %bb.43: # %cond.load85
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -14029,8 +13667,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB108_6
+; RV64ZVE32F-NEXT: j .LBB108_5
; RV64ZVE32F-NEXT: .LBB108_50: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
@@ -14040,9 +13677,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB108_7
-; RV64ZVE32F-NEXT: j .LBB108_8
+; RV64ZVE32F-NEXT: j .LBB108_6
; RV64ZVE32F-NEXT: .LBB108_51: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -14050,8 +13685,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6
-; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: beqz a2, .LBB108_12
+; RV64ZVE32F-NEXT: j .LBB108_11
; RV64ZVE32F-NEXT: .LBB108_52: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
@@ -14061,8 +13695,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v13, a2
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 7
-; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: beqz a2, .LBB108_13
+; RV64ZVE32F-NEXT: j .LBB108_12
; RV64ZVE32F-NEXT: .LBB108_53: # %cond.load22
; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@@ -14070,9 +13703,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v13, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8
-; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: bnez a2, .LBB108_14
-; RV64ZVE32F-NEXT: j .LBB108_15
+; RV64ZVE32F-NEXT: j .LBB108_13
; RV64ZVE32F-NEXT: .LBB108_54: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -14080,8 +13711,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14
-; RV64ZVE32F-NEXT: slli a2, a1, 48
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_25
+; RV64ZVE32F-NEXT: j .LBB108_24
; RV64ZVE32F-NEXT: .LBB108_55: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -14091,8 +13721,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15
-; RV64ZVE32F-NEXT: slli a2, a1, 47
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_26
+; RV64ZVE32F-NEXT: j .LBB108_25
; RV64ZVE32F-NEXT: .LBB108_56: # %cond.load46
; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -14100,9 +13729,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
-; RV64ZVE32F-NEXT: slli a2, a1, 46
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_27
-; RV64ZVE32F-NEXT: j .LBB108_28
+; RV64ZVE32F-NEXT: j .LBB108_26
; RV64ZVE32F-NEXT: .LBB108_57: # %cond.load52
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -14110,8 +13737,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18
-; RV64ZVE32F-NEXT: slli a2, a1, 44
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_30
+; RV64ZVE32F-NEXT: j .LBB108_29
; RV64ZVE32F-NEXT: .LBB108_58: # %cond.load55
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
@@ -14121,9 +13747,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19
-; RV64ZVE32F-NEXT: slli a2, a1, 43
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_31
-; RV64ZVE32F-NEXT: j .LBB108_32
+; RV64ZVE32F-NEXT: j .LBB108_30
; RV64ZVE32F-NEXT: .LBB108_59: # %cond.load64
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -14131,8 +13755,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
-; RV64ZVE32F-NEXT: slli a2, a1, 40
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_36
+; RV64ZVE32F-NEXT: j .LBB108_35
; RV64ZVE32F-NEXT: .LBB108_60: # %cond.load67
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -14142,8 +13765,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
-; RV64ZVE32F-NEXT: slli a2, a1, 39
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_37
+; RV64ZVE32F-NEXT: j .LBB108_36
; RV64ZVE32F-NEXT: .LBB108_61: # %cond.load70
; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -14151,9 +13773,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
-; RV64ZVE32F-NEXT: slli a2, a1, 38
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_38
-; RV64ZVE32F-NEXT: j .LBB108_39
+; RV64ZVE32F-NEXT: j .LBB108_37
; RV64ZVE32F-NEXT: .LBB108_62: # %cond.load76
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -14161,8 +13781,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26
-; RV64ZVE32F-NEXT: slli a2, a1, 36
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_41
+; RV64ZVE32F-NEXT: j .LBB108_40
; RV64ZVE32F-NEXT: .LBB108_63: # %cond.load79
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -14172,8 +13791,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
-; RV64ZVE32F-NEXT: slli a2, a1, 35
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_42
+; RV64ZVE32F-NEXT: j .LBB108_41
; RV64ZVE32F-NEXT: .LBB108_64: # %cond.load82
; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -14181,9 +13799,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
-; RV64ZVE32F-NEXT: slli a2, a1, 34
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_43
-; RV64ZVE32F-NEXT: j .LBB108_44
+; RV64ZVE32F-NEXT: j .LBB108_42
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
%v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
ret <32 x i8> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 8f2672e8f40c1..fac8fe1b9a352 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -79,16 +79,15 @@ define void @mscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB1_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
-; RV64ZVE32F-NEXT: .LBB1_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB1_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
+; RV64ZVE32F-NEXT: j .LBB1_1
; RV64ZVE32F-NEXT: .LBB1_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -127,16 +126,15 @@ define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
; RV64ZVE32F-NEXT: bnez a3, .LBB2_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB2_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB2_4
-; RV64ZVE32F-NEXT: .LBB2_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB2_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB2_2
+; RV64ZVE32F-NEXT: j .LBB2_1
; RV64ZVE32F-NEXT: .LBB2_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -184,16 +182,15 @@ define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
; RV64ZVE32F-NEXT: bnez a3, .LBB3_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB3_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB3_4
-; RV64ZVE32F-NEXT: .LBB3_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB3_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB3_2
+; RV64ZVE32F-NEXT: j .LBB3_1
; RV64ZVE32F-NEXT: .LBB3_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -247,16 +244,15 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: andi a1, a0, 1
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: bnez a1, .LBB4_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB4_1: # %else
; RV64ZVE32F-NEXT: andi a0, a0, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB4_4
-; RV64ZVE32F-NEXT: .LBB4_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a2)
-; RV64ZVE32F-NEXT: andi a0, a0, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB4_2
+; RV64ZVE32F-NEXT: j .LBB4_1
; RV64ZVE32F-NEXT: .LBB4_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -291,7 +287,7 @@ define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB5_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB5_1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB5_6
; RV64ZVE32F-NEXT: .LBB5_2: # %else2
@@ -300,26 +296,23 @@ define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB5_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB5_8
-; RV64ZVE32F-NEXT: .LBB5_4: # %else6
+; RV64ZVE32F-NEXT: # %bb.4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB5_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB5_2
+; RV64ZVE32F-NEXT: j .LBB5_1
; RV64ZVE32F-NEXT: .LBB5_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse8.v v9, (a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB5_3
+; RV64ZVE32F-NEXT: j .LBB5_2
; RV64ZVE32F-NEXT: .LBB5_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a3, a3, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB5_4
+; RV64ZVE32F-NEXT: j .LBB5_3
; RV64ZVE32F-NEXT: .LBB5_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -397,7 +390,7 @@ define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB8_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB8_1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB8_10
; RV64ZVE32F-NEXT: .LBB8_2: # %else2
@@ -418,50 +411,43 @@ define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB8_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB8_16
-; RV64ZVE32F-NEXT: .LBB8_8: # %else14
+; RV64ZVE32F-NEXT: # %bb.8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB8_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB8_2
+; RV64ZVE32F-NEXT: j .LBB8_1
; RV64ZVE32F-NEXT: .LBB8_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse8.v v9, (t0)
-; RV64ZVE32F-NEXT: andi a0, a4, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB8_3
+; RV64ZVE32F-NEXT: j .LBB8_2
; RV64ZVE32F-NEXT: .LBB8_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse8.v v9, (a7)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB8_4
+; RV64ZVE32F-NEXT: j .LBB8_3
; RV64ZVE32F-NEXT: .LBB8_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse8.v v9, (a6)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB8_5
+; RV64ZVE32F-NEXT: j .LBB8_4
; RV64ZVE32F-NEXT: .LBB8_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse8.v v9, (a5)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB8_6
+; RV64ZVE32F-NEXT: j .LBB8_5
; RV64ZVE32F-NEXT: .LBB8_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
; RV64ZVE32F-NEXT: vse8.v v9, (a3)
-; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: beqz a0, .LBB8_7
+; RV64ZVE32F-NEXT: j .LBB8_6
; RV64ZVE32F-NEXT: .LBB8_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB8_8
+; RV64ZVE32F-NEXT: j .LBB8_7
; RV64ZVE32F-NEXT: .LBB8_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -517,7 +503,7 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB9_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB9_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB9_13
; RV64ZVE32F-NEXT: .LBB9_6: # %else6
@@ -526,7 +512,7 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: .LBB9_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB9_9
-; RV64ZVE32F-NEXT: .LBB9_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -539,10 +525,10 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB9_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB9_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB9_16
-; RV64ZVE32F-NEXT: .LBB9_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB9_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -550,8 +536,7 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB9_6
+; RV64ZVE32F-NEXT: j .LBB9_5
; RV64ZVE32F-NEXT: .LBB9_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -560,25 +545,21 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB9_7
+; RV64ZVE32F-NEXT: j .LBB9_6
; RV64ZVE32F-NEXT: .LBB9_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB9_8
-; RV64ZVE32F-NEXT: j .LBB9_9
+; RV64ZVE32F-NEXT: j .LBB9_7
; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB9_11
+; RV64ZVE32F-NEXT: j .LBB9_10
; RV64ZVE32F-NEXT: .LBB9_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -655,16 +636,15 @@ define void @mscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB11_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB11_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB11_4
-; RV64ZVE32F-NEXT: .LBB11_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB11_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
+; RV64ZVE32F-NEXT: j .LBB11_1
; RV64ZVE32F-NEXT: .LBB11_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -704,16 +684,15 @@ define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
; RV64ZVE32F-NEXT: bnez a3, .LBB12_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB12_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB12_4
-; RV64ZVE32F-NEXT: .LBB12_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB12_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB12_2
+; RV64ZVE32F-NEXT: j .LBB12_1
; RV64ZVE32F-NEXT: .LBB12_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -765,16 +744,15 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: bnez a1, .LBB13_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB13_1: # %else
; RV64ZVE32F-NEXT: andi a0, a0, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB13_4
-; RV64ZVE32F-NEXT: .LBB13_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a2)
-; RV64ZVE32F-NEXT: andi a0, a0, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB13_2
+; RV64ZVE32F-NEXT: j .LBB13_1
; RV64ZVE32F-NEXT: .LBB13_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -809,7 +787,7 @@ define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB14_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB14_1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB14_6
; RV64ZVE32F-NEXT: .LBB14_2: # %else2
@@ -818,26 +796,23 @@ define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB14_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB14_8
-; RV64ZVE32F-NEXT: .LBB14_4: # %else6
+; RV64ZVE32F-NEXT: # %bb.4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB14_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB14_2
+; RV64ZVE32F-NEXT: j .LBB14_1
; RV64ZVE32F-NEXT: .LBB14_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse16.v v9, (a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB14_3
+; RV64ZVE32F-NEXT: j .LBB14_2
; RV64ZVE32F-NEXT: .LBB14_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a3, a3, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB14_4
+; RV64ZVE32F-NEXT: j .LBB14_3
; RV64ZVE32F-NEXT: .LBB14_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -915,7 +890,7 @@ define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB17_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB17_1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB17_10
; RV64ZVE32F-NEXT: .LBB17_2: # %else2
@@ -936,50 +911,43 @@ define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB17_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB17_16
-; RV64ZVE32F-NEXT: .LBB17_8: # %else14
+; RV64ZVE32F-NEXT: # %bb.8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB17_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB17_2
+; RV64ZVE32F-NEXT: j .LBB17_1
; RV64ZVE32F-NEXT: .LBB17_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse16.v v9, (t0)
-; RV64ZVE32F-NEXT: andi a0, a4, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB17_3
+; RV64ZVE32F-NEXT: j .LBB17_2
; RV64ZVE32F-NEXT: .LBB17_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse16.v v9, (a7)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB17_4
+; RV64ZVE32F-NEXT: j .LBB17_3
; RV64ZVE32F-NEXT: .LBB17_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse16.v v9, (a6)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB17_5
+; RV64ZVE32F-NEXT: j .LBB17_4
; RV64ZVE32F-NEXT: .LBB17_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a5)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB17_6
+; RV64ZVE32F-NEXT: j .LBB17_5
; RV64ZVE32F-NEXT: .LBB17_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
; RV64ZVE32F-NEXT: vse16.v v9, (a3)
-; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: beqz a0, .LBB17_7
+; RV64ZVE32F-NEXT: j .LBB17_6
; RV64ZVE32F-NEXT: .LBB17_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB17_8
+; RV64ZVE32F-NEXT: j .LBB17_7
; RV64ZVE32F-NEXT: .LBB17_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -1039,7 +1007,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB18_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB18_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB18_13
; RV64ZVE32F-NEXT: .LBB18_6: # %else6
@@ -1048,7 +1016,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: .LBB18_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB18_9
-; RV64ZVE32F-NEXT: .LBB18_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1062,10 +1030,10 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB18_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB18_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB18_16
-; RV64ZVE32F-NEXT: .LBB18_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB18_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1074,8 +1042,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB18_6
+; RV64ZVE32F-NEXT: j .LBB18_5
; RV64ZVE32F-NEXT: .LBB18_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1085,8 +1052,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB18_7
+; RV64ZVE32F-NEXT: j .LBB18_6
; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1095,9 +1061,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB18_8
-; RV64ZVE32F-NEXT: j .LBB18_9
+; RV64ZVE32F-NEXT: j .LBB18_7
; RV64ZVE32F-NEXT: .LBB18_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
@@ -1105,8 +1069,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB18_11
+; RV64ZVE32F-NEXT: j .LBB18_10
; RV64ZVE32F-NEXT: .LBB18_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1172,7 +1135,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB19_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB19_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB19_13
; RV64ZVE32F-NEXT: .LBB19_6: # %else6
@@ -1181,7 +1144,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB19_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB19_9
-; RV64ZVE32F-NEXT: .LBB19_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1195,10 +1158,10 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB19_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB19_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB19_16
-; RV64ZVE32F-NEXT: .LBB19_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB19_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1207,8 +1170,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB19_6
+; RV64ZVE32F-NEXT: j .LBB19_5
; RV64ZVE32F-NEXT: .LBB19_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1218,8 +1180,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB19_7
+; RV64ZVE32F-NEXT: j .LBB19_6
; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1228,9 +1189,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB19_8
-; RV64ZVE32F-NEXT: j .LBB19_9
+; RV64ZVE32F-NEXT: j .LBB19_7
; RV64ZVE32F-NEXT: .LBB19_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
@@ -1238,8 +1197,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB19_11
+; RV64ZVE32F-NEXT: j .LBB19_10
; RV64ZVE32F-NEXT: .LBB19_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1306,7 +1264,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB20_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB20_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB20_13
; RV64ZVE32F-NEXT: .LBB20_6: # %else6
@@ -1315,7 +1273,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB20_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB20_9
-; RV64ZVE32F-NEXT: .LBB20_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1330,10 +1288,10 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB20_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB20_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB20_16
-; RV64ZVE32F-NEXT: .LBB20_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB20_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1343,8 +1301,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB20_6
+; RV64ZVE32F-NEXT: j .LBB20_5
; RV64ZVE32F-NEXT: .LBB20_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1355,8 +1312,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB20_7
+; RV64ZVE32F-NEXT: j .LBB20_6
; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1366,9 +1322,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB20_8
-; RV64ZVE32F-NEXT: j .LBB20_9
+; RV64ZVE32F-NEXT: j .LBB20_7
; RV64ZVE32F-NEXT: .LBB20_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
@@ -1377,8 +1331,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB20_11
+; RV64ZVE32F-NEXT: j .LBB20_10
; RV64ZVE32F-NEXT: .LBB20_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1444,7 +1397,7 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB21_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB21_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB21_13
; RV64ZVE32F-NEXT: .LBB21_6: # %else6
@@ -1453,7 +1406,7 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: .LBB21_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB21_9
-; RV64ZVE32F-NEXT: .LBB21_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1467,10 +1420,10 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB21_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB21_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB21_16
-; RV64ZVE32F-NEXT: .LBB21_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB21_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1479,8 +1432,7 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB21_6
+; RV64ZVE32F-NEXT: j .LBB21_5
; RV64ZVE32F-NEXT: .LBB21_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1490,8 +1442,7 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB21_7
+; RV64ZVE32F-NEXT: j .LBB21_6
; RV64ZVE32F-NEXT: .LBB21_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1499,9 +1450,7 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB21_8
-; RV64ZVE32F-NEXT: j .LBB21_9
+; RV64ZVE32F-NEXT: j .LBB21_7
; RV64ZVE32F-NEXT: .LBB21_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
@@ -1509,8 +1458,7 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB21_11
+; RV64ZVE32F-NEXT: j .LBB21_10
; RV64ZVE32F-NEXT: .LBB21_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1588,16 +1536,15 @@ define void @mscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB23_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB23_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB23_4
-; RV64ZVE32F-NEXT: .LBB23_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB23_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB23_2
+; RV64ZVE32F-NEXT: j .LBB23_1
; RV64ZVE32F-NEXT: .LBB23_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -1642,16 +1589,15 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: bnez a4, .LBB24_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB24_1: # %else
; RV64ZVE32F-NEXT: andi a0, a0, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB24_4
-; RV64ZVE32F-NEXT: .LBB24_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a2)
-; RV64ZVE32F-NEXT: andi a0, a0, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB24_2
+; RV64ZVE32F-NEXT: j .LBB24_1
; RV64ZVE32F-NEXT: .LBB24_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -1686,7 +1632,7 @@ define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB25_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB25_1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB25_6
; RV64ZVE32F-NEXT: .LBB25_2: # %else2
@@ -1695,26 +1641,23 @@ define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB25_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB25_8
-; RV64ZVE32F-NEXT: .LBB25_4: # %else6
+; RV64ZVE32F-NEXT: # %bb.4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB25_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB25_2
+; RV64ZVE32F-NEXT: j .LBB25_1
; RV64ZVE32F-NEXT: .LBB25_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse32.v v9, (a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB25_3
+; RV64ZVE32F-NEXT: j .LBB25_2
; RV64ZVE32F-NEXT: .LBB25_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse32.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a3, a3, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB25_4
+; RV64ZVE32F-NEXT: j .LBB25_3
; RV64ZVE32F-NEXT: .LBB25_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -1792,7 +1735,7 @@ define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB28_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB28_1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB28_10
; RV64ZVE32F-NEXT: .LBB28_2: # %else2
@@ -1813,53 +1756,46 @@ define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB28_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB28_16
-; RV64ZVE32F-NEXT: .LBB28_8: # %else14
+; RV64ZVE32F-NEXT: # %bb.8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB28_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB28_2
+; RV64ZVE32F-NEXT: j .LBB28_1
; RV64ZVE32F-NEXT: .LBB28_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vse32.v v10, (t0)
-; RV64ZVE32F-NEXT: andi a0, a4, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB28_3
+; RV64ZVE32F-NEXT: j .LBB28_2
; RV64ZVE32F-NEXT: .LBB28_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV64ZVE32F-NEXT: vse32.v v10, (a7)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB28_4
+; RV64ZVE32F-NEXT: j .LBB28_3
; RV64ZVE32F-NEXT: .LBB28_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a6)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB28_5
+; RV64ZVE32F-NEXT: j .LBB28_4
; RV64ZVE32F-NEXT: .LBB28_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a5)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB28_6
+; RV64ZVE32F-NEXT: j .LBB28_5
; RV64ZVE32F-NEXT: .LBB28_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a3)
-; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: beqz a0, .LBB28_7
+; RV64ZVE32F-NEXT: j .LBB28_6
; RV64ZVE32F-NEXT: .LBB28_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB28_8
+; RV64ZVE32F-NEXT: j .LBB28_7
; RV64ZVE32F-NEXT: .LBB28_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -1919,7 +1855,7 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB29_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB29_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB29_13
; RV64ZVE32F-NEXT: .LBB29_6: # %else6
@@ -1928,7 +1864,7 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: .LBB29_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB29_9
-; RV64ZVE32F-NEXT: .LBB29_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1943,10 +1879,10 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB29_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB29_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB29_16
-; RV64ZVE32F-NEXT: .LBB29_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB29_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1956,8 +1892,7 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB29_6
+; RV64ZVE32F-NEXT: j .LBB29_5
; RV64ZVE32F-NEXT: .LBB29_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -1967,8 +1902,7 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB29_7
+; RV64ZVE32F-NEXT: j .LBB29_6
; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -1978,9 +1912,7 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB29_8
-; RV64ZVE32F-NEXT: j .LBB29_9
+; RV64ZVE32F-NEXT: j .LBB29_7
; RV64ZVE32F-NEXT: .LBB29_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -1989,8 +1921,7 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB29_11
+; RV64ZVE32F-NEXT: j .LBB29_10
; RV64ZVE32F-NEXT: .LBB29_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2056,7 +1987,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB30_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB30_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB30_13
; RV64ZVE32F-NEXT: .LBB30_6: # %else6
@@ -2065,7 +1996,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB30_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB30_9
-; RV64ZVE32F-NEXT: .LBB30_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2080,10 +2011,10 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB30_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB30_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB30_16
-; RV64ZVE32F-NEXT: .LBB30_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB30_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2093,8 +2024,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB30_6
+; RV64ZVE32F-NEXT: j .LBB30_5
; RV64ZVE32F-NEXT: .LBB30_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2104,8 +2034,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB30_7
+; RV64ZVE32F-NEXT: j .LBB30_6
; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -2115,9 +2044,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB30_8
-; RV64ZVE32F-NEXT: j .LBB30_9
+; RV64ZVE32F-NEXT: j .LBB30_7
; RV64ZVE32F-NEXT: .LBB30_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -2126,8 +2053,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB30_11
+; RV64ZVE32F-NEXT: j .LBB30_10
; RV64ZVE32F-NEXT: .LBB30_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2197,7 +2123,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB31_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB31_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB31_13
; RV64ZVE32F-NEXT: .LBB31_6: # %else6
@@ -2206,7 +2132,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB31_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB31_9
-; RV64ZVE32F-NEXT: .LBB31_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2222,10 +2148,10 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB31_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB31_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB31_16
-; RV64ZVE32F-NEXT: .LBB31_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB31_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2236,8 +2162,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB31_6
+; RV64ZVE32F-NEXT: j .LBB31_5
; RV64ZVE32F-NEXT: .LBB31_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2248,8 +2173,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB31_7
+; RV64ZVE32F-NEXT: j .LBB31_6
; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -2260,9 +2184,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB31_8
-; RV64ZVE32F-NEXT: j .LBB31_9
+; RV64ZVE32F-NEXT: j .LBB31_7
; RV64ZVE32F-NEXT: .LBB31_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
@@ -2272,8 +2194,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB31_11
+; RV64ZVE32F-NEXT: j .LBB31_10
; RV64ZVE32F-NEXT: .LBB31_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2343,7 +2264,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB32_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB32_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB32_13
; RV64ZVE32F-NEXT: .LBB32_6: # %else6
@@ -2352,7 +2273,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: .LBB32_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB32_9
-; RV64ZVE32F-NEXT: .LBB32_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2367,10 +2288,10 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB32_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB32_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB32_16
-; RV64ZVE32F-NEXT: .LBB32_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB32_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2380,8 +2301,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB32_6
+; RV64ZVE32F-NEXT: j .LBB32_5
; RV64ZVE32F-NEXT: .LBB32_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2391,8 +2311,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB32_7
+; RV64ZVE32F-NEXT: j .LBB32_6
; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -2402,9 +2321,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB32_8
-; RV64ZVE32F-NEXT: j .LBB32_9
+; RV64ZVE32F-NEXT: j .LBB32_7
; RV64ZVE32F-NEXT: .LBB32_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -2413,8 +2330,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB32_11
+; RV64ZVE32F-NEXT: j .LBB32_10
; RV64ZVE32F-NEXT: .LBB32_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2482,7 +2398,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB33_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB33_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB33_13
; RV64ZVE32F-NEXT: .LBB33_6: # %else6
@@ -2491,7 +2407,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB33_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB33_9
-; RV64ZVE32F-NEXT: .LBB33_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2506,10 +2422,10 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB33_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB33_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB33_16
-; RV64ZVE32F-NEXT: .LBB33_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB33_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2519,8 +2435,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB33_6
+; RV64ZVE32F-NEXT: j .LBB33_5
; RV64ZVE32F-NEXT: .LBB33_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2530,8 +2445,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB33_7
+; RV64ZVE32F-NEXT: j .LBB33_6
; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -2541,9 +2455,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB33_8
-; RV64ZVE32F-NEXT: j .LBB33_9
+; RV64ZVE32F-NEXT: j .LBB33_7
; RV64ZVE32F-NEXT: .LBB33_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -2552,8 +2464,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB33_11
+; RV64ZVE32F-NEXT: j .LBB33_10
; RV64ZVE32F-NEXT: .LBB33_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2624,7 +2535,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB34_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB34_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB34_13
; RV64ZVE32F-NEXT: .LBB34_6: # %else6
@@ -2633,7 +2544,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB34_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB34_9
-; RV64ZVE32F-NEXT: .LBB34_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2649,10 +2560,10 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB34_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB34_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
-; RV64ZVE32F-NEXT: .LBB34_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB34_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2663,8 +2574,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB34_6
+; RV64ZVE32F-NEXT: j .LBB34_5
; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2675,8 +2585,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB34_7
+; RV64ZVE32F-NEXT: j .LBB34_6
; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -2687,9 +2596,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB34_8
-; RV64ZVE32F-NEXT: j .LBB34_9
+; RV64ZVE32F-NEXT: j .LBB34_7
; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 48
@@ -2699,8 +2606,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB34_11
+; RV64ZVE32F-NEXT: j .LBB34_10
; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2765,7 +2671,7 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB35_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB35_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB35_13
; RV64ZVE32F-NEXT: .LBB35_6: # %else6
@@ -2774,7 +2680,7 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: .LBB35_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
-; RV64ZVE32F-NEXT: .LBB35_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2789,10 +2695,10 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB35_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB35_16
-; RV64ZVE32F-NEXT: .LBB35_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB35_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2801,8 +2707,7 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
+; RV64ZVE32F-NEXT: j .LBB35_5
; RV64ZVE32F-NEXT: .LBB35_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2811,8 +2716,7 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
+; RV64ZVE32F-NEXT: j .LBB35_6
; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@@ -2821,9 +2725,7 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
-; RV64ZVE32F-NEXT: j .LBB35_9
+; RV64ZVE32F-NEXT: j .LBB35_7
; RV64ZVE32F-NEXT: .LBB35_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -2832,8 +2734,7 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
+; RV64ZVE32F-NEXT: j .LBB35_10
; RV64ZVE32F-NEXT: .LBB35_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2914,10 +2815,10 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a3, v0
; RV32ZVE32F-NEXT: andi a4, a3, 1
; RV32ZVE32F-NEXT: bnez a4, .LBB37_3
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB37_1: # %else
; RV32ZVE32F-NEXT: andi a3, a3, 2
; RV32ZVE32F-NEXT: bnez a3, .LBB37_4
-; RV32ZVE32F-NEXT: .LBB37_2: # %else2
+; RV32ZVE32F-NEXT: # %bb.2: # %else2
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB37_3: # %cond.store
; RV32ZVE32F-NEXT: lw a4, 0(a0)
@@ -2926,8 +2827,7 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a5, v8
; RV32ZVE32F-NEXT: sw a4, 0(a5)
; RV32ZVE32F-NEXT: sw a0, 4(a5)
-; RV32ZVE32F-NEXT: andi a3, a3, 2
-; RV32ZVE32F-NEXT: beqz a3, .LBB37_2
+; RV32ZVE32F-NEXT: j .LBB37_1
; RV32ZVE32F-NEXT: .LBB37_4: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -2942,15 +2842,14 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi a5, a4, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB37_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB37_1: # %else
; RV64ZVE32F-NEXT: andi a4, a4, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB37_4
-; RV64ZVE32F-NEXT: .LBB37_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB37_3: # %cond.store
; RV64ZVE32F-NEXT: sd a0, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a4, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB37_2
+; RV64ZVE32F-NEXT: j .LBB37_1
; RV64ZVE32F-NEXT: .LBB37_4: # %cond.store1
; RV64ZVE32F-NEXT: sd a1, 0(a3)
; RV64ZVE32F-NEXT: ret
@@ -2985,7 +2884,7 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a5, v0
; RV32ZVE32F-NEXT: andi t0, a5, 1
; RV32ZVE32F-NEXT: bnez t0, .LBB38_5
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB38_1: # %else
; RV32ZVE32F-NEXT: andi a0, a5, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB38_6
; RV32ZVE32F-NEXT: .LBB38_2: # %else2
@@ -2994,7 +2893,7 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: .LBB38_3: # %else4
; RV32ZVE32F-NEXT: andi a5, a5, 8
; RV32ZVE32F-NEXT: bnez a5, .LBB38_8
-; RV32ZVE32F-NEXT: .LBB38_4: # %else6
+; RV32ZVE32F-NEXT: # %bb.4: # %else6
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB38_5: # %cond.store
; RV32ZVE32F-NEXT: lw t0, 0(a0)
@@ -3003,24 +2902,21 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s t1, v8
; RV32ZVE32F-NEXT: sw t0, 0(t1)
; RV32ZVE32F-NEXT: sw a0, 4(t1)
-; RV32ZVE32F-NEXT: andi a0, a5, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB38_2
+; RV32ZVE32F-NEXT: j .LBB38_1
; RV32ZVE32F-NEXT: .LBB38_6: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v9
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a5, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB38_3
+; RV32ZVE32F-NEXT: j .LBB38_2
; RV32ZVE32F-NEXT: .LBB38_7: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v9
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a4, 4(a0)
-; RV32ZVE32F-NEXT: andi a5, a5, 8
-; RV32ZVE32F-NEXT: beqz a5, .LBB38_4
+; RV32ZVE32F-NEXT: j .LBB38_3
; RV32ZVE32F-NEXT: .LBB38_8: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -3041,7 +2937,7 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a7, v0
; RV64ZVE32F-NEXT: andi t1, a7, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB38_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB38_1: # %else
; RV64ZVE32F-NEXT: andi a0, a7, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB38_6
; RV64ZVE32F-NEXT: .LBB38_2: # %else2
@@ -3050,22 +2946,19 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB38_3: # %else4
; RV64ZVE32F-NEXT: andi a0, a7, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB38_8
-; RV64ZVE32F-NEXT: .LBB38_4: # %else6
+; RV64ZVE32F-NEXT: # %bb.4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB38_5: # %cond.store
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: sd a0, 0(a1)
-; RV64ZVE32F-NEXT: andi a0, a7, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB38_2
+; RV64ZVE32F-NEXT: j .LBB38_1
; RV64ZVE32F-NEXT: .LBB38_6: # %cond.store1
; RV64ZVE32F-NEXT: sd t0, 0(a6)
-; RV64ZVE32F-NEXT: andi a0, a7, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB38_3
+; RV64ZVE32F-NEXT: j .LBB38_2
; RV64ZVE32F-NEXT: .LBB38_7: # %cond.store3
; RV64ZVE32F-NEXT: sd a5, 0(a4)
-; RV64ZVE32F-NEXT: andi a0, a7, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB38_4
+; RV64ZVE32F-NEXT: j .LBB38_3
; RV64ZVE32F-NEXT: .LBB38_8: # %cond.store5
; RV64ZVE32F-NEXT: sd a3, 0(a2)
; RV64ZVE32F-NEXT: ret
@@ -3185,7 +3078,7 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a7, v0
; RV32ZVE32F-NEXT: andi s1, a7, 1
; RV32ZVE32F-NEXT: bnez s1, .LBB41_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB41_1: # %else
; RV32ZVE32F-NEXT: andi a0, a7, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB41_11
; RV32ZVE32F-NEXT: .LBB41_2: # %else2
@@ -3206,7 +3099,7 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: .LBB41_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a7, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB41_9
-; RV32ZVE32F-NEXT: .LBB41_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3230,57 +3123,49 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw s1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, a7, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB41_2
+; RV32ZVE32F-NEXT: j .LBB41_1
; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t6, 0(a0)
; RV32ZVE32F-NEXT: sw s0, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB41_3
+; RV32ZVE32F-NEXT: j .LBB41_2
; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t4, 0(a0)
; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB41_4
+; RV32ZVE32F-NEXT: j .LBB41_3
; RV32ZVE32F-NEXT: .LBB41_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t2, 0(a0)
; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB41_5
+; RV32ZVE32F-NEXT: j .LBB41_4
; RV32ZVE32F-NEXT: .LBB41_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t0, 0(a0)
; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB41_6
+; RV32ZVE32F-NEXT: j .LBB41_5
; RV32ZVE32F-NEXT: .LBB41_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a5, 0(a0)
; RV32ZVE32F-NEXT: sw a6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB41_7
+; RV32ZVE32F-NEXT: j .LBB41_6
; RV32ZVE32F-NEXT: .LBB41_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB41_8
-; RV32ZVE32F-NEXT: j .LBB41_9
+; RV32ZVE32F-NEXT: j .LBB41_7
;
; RV64ZVE32F-LABEL: mscatter_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -3311,7 +3196,7 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s t1, v0
; RV64ZVE32F-NEXT: andi s2, t1, 1
; RV64ZVE32F-NEXT: bnez s2, .LBB41_10
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB41_1: # %else
; RV64ZVE32F-NEXT: andi a0, t1, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB41_11
; RV64ZVE32F-NEXT: .LBB41_2: # %else2
@@ -3332,7 +3217,7 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB41_7: # %else12
; RV64ZVE32F-NEXT: andi a0, t1, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB41_9
-; RV64ZVE32F-NEXT: .LBB41_8: # %cond.store13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV64ZVE32F-NEXT: sd a5, 0(a2)
; RV64ZVE32F-NEXT: .LBB41_9: # %else14
; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
@@ -3349,33 +3234,25 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: sd a0, 0(a1)
-; RV64ZVE32F-NEXT: andi a0, t1, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB41_2
+; RV64ZVE32F-NEXT: j .LBB41_1
; RV64ZVE32F-NEXT: .LBB41_11: # %cond.store1
; RV64ZVE32F-NEXT: sd s1, 0(t5)
-; RV64ZVE32F-NEXT: andi a0, t1, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB41_3
+; RV64ZVE32F-NEXT: j .LBB41_2
; RV64ZVE32F-NEXT: .LBB41_12: # %cond.store3
; RV64ZVE32F-NEXT: sd s0, 0(t3)
-; RV64ZVE32F-NEXT: andi a0, t1, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB41_4
+; RV64ZVE32F-NEXT: j .LBB41_3
; RV64ZVE32F-NEXT: .LBB41_13: # %cond.store5
; RV64ZVE32F-NEXT: sd t6, 0(t2)
-; RV64ZVE32F-NEXT: andi a0, t1, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB41_5
+; RV64ZVE32F-NEXT: j .LBB41_4
; RV64ZVE32F-NEXT: .LBB41_14: # %cond.store7
; RV64ZVE32F-NEXT: sd t4, 0(t0)
-; RV64ZVE32F-NEXT: andi a0, t1, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB41_6
+; RV64ZVE32F-NEXT: j .LBB41_5
; RV64ZVE32F-NEXT: .LBB41_15: # %cond.store9
; RV64ZVE32F-NEXT: sd a7, 0(a4)
-; RV64ZVE32F-NEXT: andi a0, t1, 64
-; RV64ZVE32F-NEXT: beqz a0, .LBB41_7
+; RV64ZVE32F-NEXT: j .LBB41_6
; RV64ZVE32F-NEXT: .LBB41_16: # %cond.store11
; RV64ZVE32F-NEXT: sd a6, 0(a3)
-; RV64ZVE32F-NEXT: andi a0, t1, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB41_8
-; RV64ZVE32F-NEXT: j .LBB41_9
+; RV64ZVE32F-NEXT: j .LBB41_7
call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
ret void
}
@@ -3432,7 +3309,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB42_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB42_1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB42_11
; RV32ZVE32F-NEXT: .LBB42_2: # %else2
@@ -3453,7 +3330,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32ZVE32F-NEXT: .LBB42_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB42_9
-; RV32ZVE32F-NEXT: .LBB42_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3476,57 +3353,49 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, t0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB42_2
+; RV32ZVE32F-NEXT: j .LBB42_1
; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB42_3
+; RV32ZVE32F-NEXT: j .LBB42_2
; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB42_4
+; RV32ZVE32F-NEXT: j .LBB42_3
; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB42_5
+; RV32ZVE32F-NEXT: j .LBB42_4
; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB42_6
+; RV32ZVE32F-NEXT: j .LBB42_5
; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB42_7
+; RV32ZVE32F-NEXT: j .LBB42_6
; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB42_8
-; RV32ZVE32F-NEXT: j .LBB42_9
+; RV32ZVE32F-NEXT: j .LBB42_7
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -3564,7 +3433,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB42_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB42_5: # %else4
; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB42_13
; RV64ZVE32F-NEXT: .LBB42_6: # %else6
@@ -3573,7 +3442,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: .LBB42_7: # %else8
; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB42_9
-; RV64ZVE32F-NEXT: .LBB42_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
@@ -3583,41 +3452,36 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB42_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB42_10: # %else12
; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB42_16
-; RV64ZVE32F-NEXT: .LBB42_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB42_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB42_6
+; RV64ZVE32F-NEXT: j .LBB42_5
; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB42_7
+; RV64ZVE32F-NEXT: j .LBB42_6
; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB42_8
-; RV64ZVE32F-NEXT: j .LBB42_9
+; RV64ZVE32F-NEXT: j .LBB42_7
; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB42_11
+; RV64ZVE32F-NEXT: j .LBB42_10
; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3682,7 +3546,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB43_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB43_1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB43_11
; RV32ZVE32F-NEXT: .LBB43_2: # %else2
@@ -3703,7 +3567,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: .LBB43_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB43_9
-; RV32ZVE32F-NEXT: .LBB43_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3726,57 +3590,49 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, t0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB43_2
+; RV32ZVE32F-NEXT: j .LBB43_1
; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB43_3
+; RV32ZVE32F-NEXT: j .LBB43_2
; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB43_4
+; RV32ZVE32F-NEXT: j .LBB43_3
; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB43_5
+; RV32ZVE32F-NEXT: j .LBB43_4
; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB43_6
+; RV32ZVE32F-NEXT: j .LBB43_5
; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB43_7
+; RV32ZVE32F-NEXT: j .LBB43_6
; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB43_8
-; RV32ZVE32F-NEXT: j .LBB43_9
+; RV32ZVE32F-NEXT: j .LBB43_7
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -3814,7 +3670,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB43_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB43_5: # %else4
; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB43_13
; RV64ZVE32F-NEXT: .LBB43_6: # %else6
@@ -3823,7 +3679,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB43_7: # %else8
; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB43_9
-; RV64ZVE32F-NEXT: .LBB43_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
@@ -3833,41 +3689,36 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB43_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB43_10: # %else12
; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB43_16
-; RV64ZVE32F-NEXT: .LBB43_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB43_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB43_6
+; RV64ZVE32F-NEXT: j .LBB43_5
; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB43_7
+; RV64ZVE32F-NEXT: j .LBB43_6
; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB43_8
-; RV64ZVE32F-NEXT: j .LBB43_9
+; RV64ZVE32F-NEXT: j .LBB43_7
; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB43_11
+; RV64ZVE32F-NEXT: j .LBB43_10
; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3934,7 +3785,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB44_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB44_1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB44_11
; RV32ZVE32F-NEXT: .LBB44_2: # %else2
@@ -3955,7 +3806,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: .LBB44_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB44_9
-; RV32ZVE32F-NEXT: .LBB44_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3978,57 +3829,49 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, t0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB44_2
+; RV32ZVE32F-NEXT: j .LBB44_1
; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB44_3
+; RV32ZVE32F-NEXT: j .LBB44_2
; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB44_4
+; RV32ZVE32F-NEXT: j .LBB44_3
; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB44_5
+; RV32ZVE32F-NEXT: j .LBB44_4
; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB44_6
+; RV32ZVE32F-NEXT: j .LBB44_5
; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB44_7
+; RV32ZVE32F-NEXT: j .LBB44_6
; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB44_8
-; RV32ZVE32F-NEXT: j .LBB44_9
+; RV32ZVE32F-NEXT: j .LBB44_7
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -4068,7 +3911,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB44_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB44_5: # %else4
; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB44_13
; RV64ZVE32F-NEXT: .LBB44_6: # %else6
@@ -4077,7 +3920,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB44_7: # %else8
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB44_9
-; RV64ZVE32F-NEXT: .LBB44_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: andi a0, a0, 255
@@ -4088,10 +3931,10 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB44_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB44_10: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB44_16
-; RV64ZVE32F-NEXT: .LBB44_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB44_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4099,8 +3942,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB44_6
+; RV64ZVE32F-NEXT: j .LBB44_5
; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4108,25 +3950,21 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB44_7
+; RV64ZVE32F-NEXT: j .LBB44_6
; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: andi a0, a0, 255
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB44_8
-; RV64ZVE32F-NEXT: j .LBB44_9
+; RV64ZVE32F-NEXT: j .LBB44_7
; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: andi a0, a0, 255
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB44_11
+; RV64ZVE32F-NEXT: j .LBB44_10
; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4193,7 +4031,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, s1, v8
; RV32ZVE32F-NEXT: bnez s2, .LBB45_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB45_1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB45_11
; RV32ZVE32F-NEXT: .LBB45_2: # %else2
@@ -4214,7 +4052,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32ZVE32F-NEXT: .LBB45_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB45_9
-; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4238,57 +4076,49 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32ZVE32F-NEXT: vmv.x.s s2, v10
; RV32ZVE32F-NEXT: sw s1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB45_2
+; RV32ZVE32F-NEXT: j .LBB45_1
; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t6, 0(a0)
; RV32ZVE32F-NEXT: sw s0, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB45_3
+; RV32ZVE32F-NEXT: j .LBB45_2
; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t4, 0(a0)
; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB45_4
+; RV32ZVE32F-NEXT: j .LBB45_3
; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t2, 0(a0)
; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB45_5
+; RV32ZVE32F-NEXT: j .LBB45_4
; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t0, 0(a0)
; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB45_6
+; RV32ZVE32F-NEXT: j .LBB45_5
; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB45_7
+; RV32ZVE32F-NEXT: j .LBB45_6
; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB45_8
-; RV32ZVE32F-NEXT: j .LBB45_9
+; RV32ZVE32F-NEXT: j .LBB45_7
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -4327,7 +4157,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB45_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB45_5: # %else4
; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB45_13
; RV64ZVE32F-NEXT: .LBB45_6: # %else6
@@ -4336,7 +4166,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: .LBB45_7: # %else8
; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB45_9
-; RV64ZVE32F-NEXT: .LBB45_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
@@ -4346,41 +4176,36 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB45_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB45_10: # %else12
; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB45_16
-; RV64ZVE32F-NEXT: .LBB45_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB45_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB45_6
+; RV64ZVE32F-NEXT: j .LBB45_5
; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB45_7
+; RV64ZVE32F-NEXT: j .LBB45_6
; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB45_8
-; RV64ZVE32F-NEXT: j .LBB45_9
+; RV64ZVE32F-NEXT: j .LBB45_7
; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB45_11
+; RV64ZVE32F-NEXT: j .LBB45_10
; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4445,7 +4270,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, s1, v8
; RV32ZVE32F-NEXT: bnez s2, .LBB46_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB46_1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB46_11
; RV32ZVE32F-NEXT: .LBB46_2: # %else2
@@ -4466,7 +4291,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .LBB46_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB46_9
-; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4490,57 +4315,49 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s s2, v10
; RV32ZVE32F-NEXT: sw s1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB46_2
+; RV32ZVE32F-NEXT: j .LBB46_1
; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t6, 0(a0)
; RV32ZVE32F-NEXT: sw s0, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB46_3
+; RV32ZVE32F-NEXT: j .LBB46_2
; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t4, 0(a0)
; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB46_4
+; RV32ZVE32F-NEXT: j .LBB46_3
; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t2, 0(a0)
; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB46_5
+; RV32ZVE32F-NEXT: j .LBB46_4
; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t0, 0(a0)
; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB46_6
+; RV32ZVE32F-NEXT: j .LBB46_5
; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB46_7
+; RV32ZVE32F-NEXT: j .LBB46_6
; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB46_8
-; RV32ZVE32F-NEXT: j .LBB46_9
+; RV32ZVE32F-NEXT: j .LBB46_7
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -4579,7 +4396,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB46_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB46_5: # %else4
; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB46_13
; RV64ZVE32F-NEXT: .LBB46_6: # %else6
@@ -4588,7 +4405,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB46_7: # %else8
; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB46_9
-; RV64ZVE32F-NEXT: .LBB46_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
@@ -4598,41 +4415,36 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB46_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB46_10: # %else12
; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB46_16
-; RV64ZVE32F-NEXT: .LBB46_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB46_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB46_6
+; RV64ZVE32F-NEXT: j .LBB46_5
; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB46_7
+; RV64ZVE32F-NEXT: j .LBB46_6
; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB46_8
-; RV64ZVE32F-NEXT: j .LBB46_9
+; RV64ZVE32F-NEXT: j .LBB46_7
; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB46_11
+; RV64ZVE32F-NEXT: j .LBB46_10
; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4699,7 +4511,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccu.vx v10, s1, v8
; RV32ZVE32F-NEXT: bnez s2, .LBB47_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB47_1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB47_11
; RV32ZVE32F-NEXT: .LBB47_2: # %else2
@@ -4720,7 +4532,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .LBB47_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB47_9
-; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4744,57 +4556,49 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s s2, v10
; RV32ZVE32F-NEXT: sw s1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB47_2
+; RV32ZVE32F-NEXT: j .LBB47_1
; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t6, 0(a0)
; RV32ZVE32F-NEXT: sw s0, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB47_3
+; RV32ZVE32F-NEXT: j .LBB47_2
; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t4, 0(a0)
; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB47_4
+; RV32ZVE32F-NEXT: j .LBB47_3
; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t2, 0(a0)
; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB47_5
+; RV32ZVE32F-NEXT: j .LBB47_4
; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t0, 0(a0)
; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB47_6
+; RV32ZVE32F-NEXT: j .LBB47_5
; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB47_7
+; RV32ZVE32F-NEXT: j .LBB47_6
; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB47_8
-; RV32ZVE32F-NEXT: j .LBB47_9
+; RV32ZVE32F-NEXT: j .LBB47_7
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -4835,7 +4639,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB47_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB47_5: # %else4
; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB47_13
; RV64ZVE32F-NEXT: .LBB47_6: # %else6
@@ -4844,7 +4648,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB47_7: # %else8
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB47_9
-; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 48
@@ -4855,10 +4659,10 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB47_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB47_10: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB47_16
-; RV64ZVE32F-NEXT: .LBB47_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB47_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4866,8 +4670,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB47_6
+; RV64ZVE32F-NEXT: j .LBB47_5
; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4875,25 +4678,21 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB47_7
+; RV64ZVE32F-NEXT: j .LBB47_6
; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 48
; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB47_8
-; RV64ZVE32F-NEXT: j .LBB47_9
+; RV64ZVE32F-NEXT: j .LBB47_7
; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 48
; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB47_11
+; RV64ZVE32F-NEXT: j .LBB47_10
; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4959,7 +4758,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB48_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB48_1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB48_11
; RV32ZVE32F-NEXT: .LBB48_2: # %else2
@@ -4980,7 +4779,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32ZVE32F-NEXT: .LBB48_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB48_9
-; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5003,57 +4802,49 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, t0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB48_2
+; RV32ZVE32F-NEXT: j .LBB48_1
; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB48_3
+; RV32ZVE32F-NEXT: j .LBB48_2
; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB48_4
+; RV32ZVE32F-NEXT: j .LBB48_3
; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB48_5
+; RV32ZVE32F-NEXT: j .LBB48_4
; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB48_6
+; RV32ZVE32F-NEXT: j .LBB48_5
; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB48_7
+; RV32ZVE32F-NEXT: j .LBB48_6
; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB48_8
-; RV32ZVE32F-NEXT: j .LBB48_9
+; RV32ZVE32F-NEXT: j .LBB48_7
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -5092,7 +4883,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB48_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB48_5: # %else4
; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB48_13
; RV64ZVE32F-NEXT: .LBB48_6: # %else6
@@ -5101,7 +4892,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: .LBB48_7: # %else8
; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB48_9
-; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
@@ -5111,41 +4902,36 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB48_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB48_10: # %else12
; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB48_16
-; RV64ZVE32F-NEXT: .LBB48_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB48_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB48_6
+; RV64ZVE32F-NEXT: j .LBB48_5
; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB48_7
+; RV64ZVE32F-NEXT: j .LBB48_6
; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB48_8
-; RV64ZVE32F-NEXT: j .LBB48_9
+; RV64ZVE32F-NEXT: j .LBB48_7
; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB48_11
+; RV64ZVE32F-NEXT: j .LBB48_10
; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5209,7 +4995,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB49_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB49_1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB49_11
; RV32ZVE32F-NEXT: .LBB49_2: # %else2
@@ -5230,7 +5016,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .LBB49_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB49_9
-; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5253,57 +5039,49 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, t0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB49_2
+; RV32ZVE32F-NEXT: j .LBB49_1
; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB49_3
+; RV32ZVE32F-NEXT: j .LBB49_2
; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB49_4
+; RV32ZVE32F-NEXT: j .LBB49_3
; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB49_5
+; RV32ZVE32F-NEXT: j .LBB49_4
; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB49_6
+; RV32ZVE32F-NEXT: j .LBB49_5
; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB49_7
+; RV32ZVE32F-NEXT: j .LBB49_6
; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB49_8
-; RV32ZVE32F-NEXT: j .LBB49_9
+; RV32ZVE32F-NEXT: j .LBB49_7
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -5342,7 +5120,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB49_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB49_5: # %else4
; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB49_13
; RV64ZVE32F-NEXT: .LBB49_6: # %else6
@@ -5351,7 +5129,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB49_7: # %else8
; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB49_9
-; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
@@ -5361,41 +5139,36 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB49_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB49_10: # %else12
; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB49_16
-; RV64ZVE32F-NEXT: .LBB49_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB49_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB49_6
+; RV64ZVE32F-NEXT: j .LBB49_5
; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB49_7
+; RV64ZVE32F-NEXT: j .LBB49_6
; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB49_8
-; RV64ZVE32F-NEXT: j .LBB49_9
+; RV64ZVE32F-NEXT: j .LBB49_7
; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB49_11
+; RV64ZVE32F-NEXT: j .LBB49_10
; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5460,7 +5233,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB50_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB50_1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB50_11
; RV32ZVE32F-NEXT: .LBB50_2: # %else2
@@ -5481,7 +5254,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .LBB50_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB50_9
-; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5504,57 +5277,49 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, t0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB50_2
+; RV32ZVE32F-NEXT: j .LBB50_1
; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB50_3
+; RV32ZVE32F-NEXT: j .LBB50_2
; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB50_4
+; RV32ZVE32F-NEXT: j .LBB50_3
; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB50_5
+; RV32ZVE32F-NEXT: j .LBB50_4
; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB50_6
+; RV32ZVE32F-NEXT: j .LBB50_5
; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB50_7
+; RV32ZVE32F-NEXT: j .LBB50_6
; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, t0, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB50_8
-; RV32ZVE32F-NEXT: j .LBB50_9
+; RV32ZVE32F-NEXT: j .LBB50_7
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -5595,7 +5360,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB50_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB50_5: # %else4
; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB50_13
; RV64ZVE32F-NEXT: .LBB50_6: # %else6
@@ -5604,7 +5369,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB50_7: # %else8
; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB50_9
-; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 32
@@ -5615,10 +5380,10 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB50_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB50_10: # %else12
; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB50_16
-; RV64ZVE32F-NEXT: .LBB50_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB50_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5626,8 +5391,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB50_6
+; RV64ZVE32F-NEXT: j .LBB50_5
; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5635,25 +5399,21 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB50_7
+; RV64ZVE32F-NEXT: j .LBB50_6
; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB50_8
-; RV64ZVE32F-NEXT: j .LBB50_9
+; RV64ZVE32F-NEXT: j .LBB50_7
; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB50_11
+; RV64ZVE32F-NEXT: j .LBB50_10
; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5748,7 +5508,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: andi s2, a2, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB51_10
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB51_1: # %else
; RV32ZVE32F-NEXT: andi a0, a2, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB51_11
; RV32ZVE32F-NEXT: .LBB51_2: # %else2
@@ -5769,7 +5529,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: .LBB51_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a2, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB51_9
-; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13
+; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5806,57 +5566,49 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: andi a0, a2, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB51_2
+; RV32ZVE32F-NEXT: j .LBB51_1
; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a2, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB51_3
+; RV32ZVE32F-NEXT: j .LBB51_2
; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a2, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB51_4
+; RV32ZVE32F-NEXT: j .LBB51_3
; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a2, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB51_5
+; RV32ZVE32F-NEXT: j .LBB51_4
; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a2, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB51_6
+; RV32ZVE32F-NEXT: j .LBB51_5
; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a7, 0(a0)
; RV32ZVE32F-NEXT: sw t0, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a2, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB51_7
+; RV32ZVE32F-NEXT: j .LBB51_6
; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a5, 0(a0)
; RV32ZVE32F-NEXT: sw a6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a2, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB51_8
-; RV32ZVE32F-NEXT: j .LBB51_9
+; RV32ZVE32F-NEXT: j .LBB51_7
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -5889,7 +5641,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV64ZVE32F-NEXT: vmv.x.s a7, v0
; RV64ZVE32F-NEXT: andi s3, a7, 1
; RV64ZVE32F-NEXT: bnez s3, .LBB51_10
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB51_1: # %else
; RV64ZVE32F-NEXT: andi a0, a7, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB51_11
; RV64ZVE32F-NEXT: .LBB51_2: # %else2
@@ -5910,7 +5662,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV64ZVE32F-NEXT: .LBB51_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a7, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB51_9
-; RV64ZVE32F-NEXT: .LBB51_8: # %cond.store13
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store13
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a1, a1, a6
; RV64ZVE32F-NEXT: sd a3, 0(a1)
@@ -5933,45 +5685,37 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a1, a2
; RV64ZVE32F-NEXT: sd a0, 0(a2)
-; RV64ZVE32F-NEXT: andi a0, a7, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB51_2
+; RV64ZVE32F-NEXT: j .LBB51_1
; RV64ZVE32F-NEXT: .LBB51_11: # %cond.store1
; RV64ZVE32F-NEXT: slli s2, s2, 3
; RV64ZVE32F-NEXT: add s2, a1, s2
; RV64ZVE32F-NEXT: sd s0, 0(s2)
-; RV64ZVE32F-NEXT: andi a0, a7, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB51_3
+; RV64ZVE32F-NEXT: j .LBB51_2
; RV64ZVE32F-NEXT: .LBB51_12: # %cond.store3
; RV64ZVE32F-NEXT: slli s1, s1, 3
; RV64ZVE32F-NEXT: add s1, a1, s1
; RV64ZVE32F-NEXT: sd t5, 0(s1)
-; RV64ZVE32F-NEXT: andi a0, a7, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB51_4
+; RV64ZVE32F-NEXT: j .LBB51_3
; RV64ZVE32F-NEXT: .LBB51_13: # %cond.store5
; RV64ZVE32F-NEXT: slli t6, t6, 3
; RV64ZVE32F-NEXT: add t6, a1, t6
; RV64ZVE32F-NEXT: sd t3, 0(t6)
-; RV64ZVE32F-NEXT: andi a0, a7, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB51_5
+; RV64ZVE32F-NEXT: j .LBB51_4
; RV64ZVE32F-NEXT: .LBB51_14: # %cond.store7
; RV64ZVE32F-NEXT: slli t4, t4, 3
; RV64ZVE32F-NEXT: add t4, a1, t4
; RV64ZVE32F-NEXT: sd t1, 0(t4)
-; RV64ZVE32F-NEXT: andi a0, a7, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB51_6
+; RV64ZVE32F-NEXT: j .LBB51_5
; RV64ZVE32F-NEXT: .LBB51_15: # %cond.store9
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: sd a5, 0(t2)
-; RV64ZVE32F-NEXT: andi a0, a7, 64
-; RV64ZVE32F-NEXT: beqz a0, .LBB51_7
+; RV64ZVE32F-NEXT: j .LBB51_6
; RV64ZVE32F-NEXT: .LBB51_16: # %cond.store11
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: sd a4, 0(t0)
-; RV64ZVE32F-NEXT: andi a0, a7, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB51_8
-; RV64ZVE32F-NEXT: j .LBB51_9
+; RV64ZVE32F-NEXT: j .LBB51_7
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
ret void
@@ -6041,18 +5785,17 @@ define void @mscatter_v2bf16(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB53_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB53_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB53_4
-; RV64ZVE32F-NEXT: .LBB53_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB53_3: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB53_2
+; RV64ZVE32F-NEXT: j .LBB53_1
; RV64ZVE32F-NEXT: .LBB53_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -6088,7 +5831,7 @@ define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB54_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB54_1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB54_6
; RV64ZVE32F-NEXT: .LBB54_2: # %else2
@@ -6097,7 +5840,7 @@ define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB54_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB54_8
-; RV64ZVE32F-NEXT: .LBB54_4: # %else6
+; RV64ZVE32F-NEXT: # %bb.4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB54_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -6105,24 +5848,21 @@ define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a5, v8
; RV64ZVE32F-NEXT: fmv.h.x fa5, a5
; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB54_2
+; RV64ZVE32F-NEXT: j .LBB54_1
; RV64ZVE32F-NEXT: .LBB54_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB54_3
+; RV64ZVE32F-NEXT: j .LBB54_2
; RV64ZVE32F-NEXT: .LBB54_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a3, a3, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB54_4
+; RV64ZVE32F-NEXT: j .LBB54_3
; RV64ZVE32F-NEXT: .LBB54_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -6210,7 +5950,7 @@ define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB57_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB57_1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB57_10
; RV64ZVE32F-NEXT: .LBB57_2: # %else2
@@ -6231,7 +5971,7 @@ define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB57_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB57_16
-; RV64ZVE32F-NEXT: .LBB57_8: # %else14
+; RV64ZVE32F-NEXT: # %bb.8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB57_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -6239,56 +5979,49 @@ define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: fmv.h.x fa5, t1
; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB57_2
+; RV64ZVE32F-NEXT: j .LBB57_1
; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(t0)
-; RV64ZVE32F-NEXT: andi a0, a4, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB57_3
+; RV64ZVE32F-NEXT: j .LBB57_2
; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a7)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB57_4
+; RV64ZVE32F-NEXT: j .LBB57_3
; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a6)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB57_5
+; RV64ZVE32F-NEXT: j .LBB57_4
; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a5)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB57_6
+; RV64ZVE32F-NEXT: j .LBB57_5
; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a3)
-; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: beqz a0, .LBB57_7
+; RV64ZVE32F-NEXT: j .LBB57_6
; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB57_8
+; RV64ZVE32F-NEXT: j .LBB57_7
; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -6354,7 +6087,7 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB58_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB58_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB58_13
; RV64ZVE32F-NEXT: .LBB58_6: # %else6
@@ -6363,7 +6096,7 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: .LBB58_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB58_9
-; RV64ZVE32F-NEXT: .LBB58_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6379,10 +6112,10 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB58_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB58_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB58_16
-; RV64ZVE32F-NEXT: .LBB58_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB58_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6393,8 +6126,7 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB58_6
+; RV64ZVE32F-NEXT: j .LBB58_5
; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6406,8 +6138,7 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB58_7
+; RV64ZVE32F-NEXT: j .LBB58_6
; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -6418,9 +6149,7 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB58_8
-; RV64ZVE32F-NEXT: j .LBB58_9
+; RV64ZVE32F-NEXT: j .LBB58_7
; RV64ZVE32F-NEXT: .LBB58_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -6430,8 +6159,7 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB58_11
+; RV64ZVE32F-NEXT: j .LBB58_10
; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6505,7 +6233,7 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB59_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB59_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB59_13
; RV64ZVE32F-NEXT: .LBB59_6: # %else6
@@ -6514,7 +6242,7 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB59_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB59_9
-; RV64ZVE32F-NEXT: .LBB59_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6530,10 +6258,10 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB59_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB59_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB59_16
-; RV64ZVE32F-NEXT: .LBB59_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB59_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6544,8 +6272,7 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB59_6
+; RV64ZVE32F-NEXT: j .LBB59_5
; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6557,8 +6284,7 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB59_7
+; RV64ZVE32F-NEXT: j .LBB59_6
; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -6569,9 +6295,7 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB59_8
-; RV64ZVE32F-NEXT: j .LBB59_9
+; RV64ZVE32F-NEXT: j .LBB59_7
; RV64ZVE32F-NEXT: .LBB59_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -6581,8 +6305,7 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB59_11
+; RV64ZVE32F-NEXT: j .LBB59_10
; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6657,7 +6380,7 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB60_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB60_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB60_13
; RV64ZVE32F-NEXT: .LBB60_6: # %else6
@@ -6666,7 +6389,7 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB60_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB60_9
-; RV64ZVE32F-NEXT: .LBB60_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6683,10 +6406,10 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB60_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB60_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB60_16
-; RV64ZVE32F-NEXT: .LBB60_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB60_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6698,8 +6421,7 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB60_6
+; RV64ZVE32F-NEXT: j .LBB60_5
; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6712,8 +6434,7 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB60_7
+; RV64ZVE32F-NEXT: j .LBB60_6
; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -6725,9 +6446,7 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB60_8
-; RV64ZVE32F-NEXT: j .LBB60_9
+; RV64ZVE32F-NEXT: j .LBB60_7
; RV64ZVE32F-NEXT: .LBB60_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -6738,8 +6457,7 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB60_11
+; RV64ZVE32F-NEXT: j .LBB60_10
; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6813,7 +6531,7 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB61_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB61_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB61_13
; RV64ZVE32F-NEXT: .LBB61_6: # %else6
@@ -6822,7 +6540,7 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: .LBB61_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB61_9
-; RV64ZVE32F-NEXT: .LBB61_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6838,10 +6556,10 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB61_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB61_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB61_16
-; RV64ZVE32F-NEXT: .LBB61_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB61_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6852,8 +6570,7 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB61_6
+; RV64ZVE32F-NEXT: j .LBB61_5
; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6865,8 +6582,7 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB61_7
+; RV64ZVE32F-NEXT: j .LBB61_6
; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -6876,9 +6592,7 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB61_8
-; RV64ZVE32F-NEXT: j .LBB61_9
+; RV64ZVE32F-NEXT: j .LBB61_7
; RV64ZVE32F-NEXT: .LBB61_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -6888,8 +6602,7 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB61_11
+; RV64ZVE32F-NEXT: j .LBB61_10
; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6982,16 +6695,15 @@ define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1
; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB63_3
-; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: .LBB63_1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB63_4
-; RV64ZVE32F-ZVFH-NEXT: .LBB63_2: # %else2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.2: # %else2
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB63_3: # %cond.store
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB63_2
+; RV64ZVE32F-ZVFH-NEXT: j .LBB63_1
; RV64ZVE32F-ZVFH-NEXT: .LBB63_4: # %cond.store1
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
@@ -7004,18 +6716,17 @@ define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB63_3
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB63_4
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_2: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.2: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_3: # %cond.store
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB63_2
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB63_1
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
@@ -7051,7 +6762,7 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-ZVFH-NEXT: andi a5, a3, 1
; RV64ZVE32F-ZVFH-NEXT: bnez a5, .LBB64_5
-; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: .LBB64_1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB64_6
; RV64ZVE32F-ZVFH-NEXT: .LBB64_2: # %else2
@@ -7060,26 +6771,23 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: .LBB64_3: # %else4
; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB64_8
-; RV64ZVE32F-ZVFH-NEXT: .LBB64_4: # %else6
+; RV64ZVE32F-ZVFH-NEXT: # %bb.4: # %else6
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB64_5: # %cond.store
; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2
-; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_2
+; RV64ZVE32F-ZVFH-NEXT: j .LBB64_1
; RV64ZVE32F-ZVFH-NEXT: .LBB64_6: # %cond.store1
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a4)
-; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 4
-; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_3
+; RV64ZVE32F-ZVFH-NEXT: j .LBB64_2
; RV64ZVE32F-ZVFH-NEXT: .LBB64_7: # %cond.store3
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a3, .LBB64_4
+; RV64ZVE32F-ZVFH-NEXT: j .LBB64_3
; RV64ZVE32F-ZVFH-NEXT: .LBB64_8: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 3
@@ -7095,7 +6803,7 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi a5, a3, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a5, .LBB64_5
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_2: # %else2
@@ -7104,7 +6812,7 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_3: # %else4
; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB64_8
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_4: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.4: # %else6
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_5: # %cond.store
; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0)
@@ -7112,24 +6820,21 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a5, v8
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a5
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_2
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB64_1
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a4)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_3
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB64_2
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a3, .LBB64_4
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB64_3
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
@@ -7233,7 +6938,7 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-ZVFH-NEXT: andi t1, a4, 1
; RV64ZVE32F-ZVFH-NEXT: bnez t1, .LBB67_9
-; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_10
; RV64ZVE32F-ZVFH-NEXT: .LBB67_2: # %else2
@@ -7254,50 +6959,43 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: .LBB67_7: # %else12
; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_16
-; RV64ZVE32F-ZVFH-NEXT: .LBB67_8: # %else14
+; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %else14
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB67_9: # %cond.store
; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2
-; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_2
+; RV64ZVE32F-ZVFH-NEXT: j .LBB67_1
; RV64ZVE32F-ZVFH-NEXT: .LBB67_10: # %cond.store1
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (t0)
-; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 4
-; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_3
+; RV64ZVE32F-ZVFH-NEXT: j .LBB67_2
; RV64ZVE32F-ZVFH-NEXT: .LBB67_11: # %cond.store3
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a7)
-; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_4
+; RV64ZVE32F-ZVFH-NEXT: j .LBB67_3
; RV64ZVE32F-ZVFH-NEXT: .LBB67_12: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a6)
-; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 16
-; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_5
+; RV64ZVE32F-ZVFH-NEXT: j .LBB67_4
; RV64ZVE32F-ZVFH-NEXT: .LBB67_13: # %cond.store7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a5)
-; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 32
-; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_6
+; RV64ZVE32F-ZVFH-NEXT: j .LBB67_5
; RV64ZVE32F-ZVFH-NEXT: .LBB67_14: # %cond.store9
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a3)
-; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 64
-; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_7
+; RV64ZVE32F-ZVFH-NEXT: j .LBB67_6
; RV64ZVE32F-ZVFH-NEXT: .LBB67_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128
-; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_8
+; RV64ZVE32F-ZVFH-NEXT: j .LBB67_7
; RV64ZVE32F-ZVFH-NEXT: .LBB67_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
@@ -7317,7 +7015,7 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi t1, a4, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez t1, .LBB67_9
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_10
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_2: # %else2
@@ -7338,7 +7036,7 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_7: # %else12
; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_16
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_8: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_9: # %cond.store
; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0)
@@ -7346,56 +7044,49 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, t1
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_2
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_1
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_10: # %cond.store1
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(t0)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_3
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_2
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_11: # %cond.store3
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a7)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_4
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_3
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_12: # %cond.store5
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a6)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_5
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_4
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_13: # %cond.store7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a5)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_6
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_14: # %cond.store9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a3)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 64
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_7
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_8
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_7
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
@@ -7457,7 +7148,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_12
-; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_5: # %else4
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_13
; RV64ZVE32F-ZVFH-NEXT: .LBB68_6: # %else6
@@ -7466,7 +7157,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: .LBB68_7: # %else8
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_9
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_8: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -7480,10 +7171,10 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_15
-; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_10: # %else12
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB68_16
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_11: # %else14
+; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %else14
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB68_12: # %cond.store3
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -7492,8 +7183,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_6
+; RV64ZVE32F-ZVFH-NEXT: j .LBB68_5
; RV64ZVE32F-ZVFH-NEXT: .LBB68_13: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7503,8 +7193,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_7
+; RV64ZVE32F-ZVFH-NEXT: j .LBB68_6
; RV64ZVE32F-ZVFH-NEXT: .LBB68_14: # %cond.store7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
@@ -7513,9 +7202,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_8
-; RV64ZVE32F-ZVFH-NEXT: j .LBB68_9
+; RV64ZVE32F-ZVFH-NEXT: j .LBB68_7
; RV64ZVE32F-ZVFH-NEXT: .LBB68_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
@@ -7523,8 +7210,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB68_11
+; RV64ZVE32F-ZVFH-NEXT: j .LBB68_10
; RV64ZVE32F-ZVFH-NEXT: .LBB68_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7571,7 +7257,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_12
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_5: # %else4
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_13
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_6: # %else6
@@ -7580,7 +7266,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_7: # %else8
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_9
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_8: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -7596,10 +7282,10 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_15
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_10: # %else12
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB68_16
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_11: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_12: # %cond.store3
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -7610,8 +7296,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_6
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_13: # %cond.store5
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -7623,8 +7308,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_7
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_14: # %cond.store7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
@@ -7635,9 +7319,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_8
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_9
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_7
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -7647,8 +7329,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB68_11
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_10
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -7718,7 +7399,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_12
-; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_5: # %else4
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_13
; RV64ZVE32F-ZVFH-NEXT: .LBB69_6: # %else6
@@ -7727,7 +7408,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: .LBB69_7: # %else8
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_9
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_8: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -7741,10 +7422,10 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_15
-; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_10: # %else12
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB69_16
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_11: # %else14
+; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %else14
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB69_12: # %cond.store3
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -7753,8 +7434,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_6
+; RV64ZVE32F-ZVFH-NEXT: j .LBB69_5
; RV64ZVE32F-ZVFH-NEXT: .LBB69_13: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7764,8 +7444,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_7
+; RV64ZVE32F-ZVFH-NEXT: j .LBB69_6
; RV64ZVE32F-ZVFH-NEXT: .LBB69_14: # %cond.store7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
@@ -7774,9 +7453,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_8
-; RV64ZVE32F-ZVFH-NEXT: j .LBB69_9
+; RV64ZVE32F-ZVFH-NEXT: j .LBB69_7
; RV64ZVE32F-ZVFH-NEXT: .LBB69_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
@@ -7784,8 +7461,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB69_11
+; RV64ZVE32F-ZVFH-NEXT: j .LBB69_10
; RV64ZVE32F-ZVFH-NEXT: .LBB69_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7832,7 +7508,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_12
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_5: # %else4
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_13
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_6: # %else6
@@ -7841,7 +7517,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_7: # %else8
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_9
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_8: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -7857,10 +7533,10 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_15
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_10: # %else12
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB69_16
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_11: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_12: # %cond.store3
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -7871,8 +7547,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_6
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_13: # %cond.store5
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -7884,8 +7559,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_7
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_14: # %cond.store7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
@@ -7896,9 +7570,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_8
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_9
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_7
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -7908,8 +7580,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB69_11
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_10
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -7980,7 +7651,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_12
-; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_5: # %else4
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_13
; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %else6
@@ -7989,7 +7660,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %else8
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_9
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -8004,10 +7675,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_15
-; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_10: # %else12
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_16
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_11: # %else14
+; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %else14
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB70_12: # %cond.store3
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -8017,8 +7688,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_6
+; RV64ZVE32F-ZVFH-NEXT: j .LBB70_5
; RV64ZVE32F-ZVFH-NEXT: .LBB70_13: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -8029,8 +7699,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_7
+; RV64ZVE32F-ZVFH-NEXT: j .LBB70_6
; RV64ZVE32F-ZVFH-NEXT: .LBB70_14: # %cond.store7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
@@ -8040,9 +7709,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_8
-; RV64ZVE32F-ZVFH-NEXT: j .LBB70_9
+; RV64ZVE32F-ZVFH-NEXT: j .LBB70_7
; RV64ZVE32F-ZVFH-NEXT: .LBB70_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
@@ -8051,8 +7718,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_11
+; RV64ZVE32F-ZVFH-NEXT: j .LBB70_10
; RV64ZVE32F-ZVFH-NEXT: .LBB70_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -8102,7 +7768,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_12
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %else4
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_13
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %else6
@@ -8111,7 +7777,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %else8
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_9
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -8128,10 +7794,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_15
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_10: # %else12
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_16
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_11: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_12: # %cond.store3
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -8143,8 +7809,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_6
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_13: # %cond.store5
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -8157,8 +7822,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_7
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_14: # %cond.store7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
@@ -8170,9 +7834,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_8
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_9
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_7
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -8183,8 +7845,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_11
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_10
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -8254,7 +7915,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_12
-; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_5: # %else4
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_13
; RV64ZVE32F-ZVFH-NEXT: .LBB71_6: # %else6
@@ -8263,7 +7924,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: .LBB71_7: # %else8
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_9
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_8: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -8277,10 +7938,10 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_15
-; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_10: # %else12
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB71_16
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_11: # %else14
+; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %else14
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB71_12: # %cond.store3
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -8289,8 +7950,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_6
+; RV64ZVE32F-ZVFH-NEXT: j .LBB71_5
; RV64ZVE32F-ZVFH-NEXT: .LBB71_13: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -8300,8 +7960,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_7
+; RV64ZVE32F-ZVFH-NEXT: j .LBB71_6
; RV64ZVE32F-ZVFH-NEXT: .LBB71_14: # %cond.store7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
@@ -8309,9 +7968,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_8
-; RV64ZVE32F-ZVFH-NEXT: j .LBB71_9
+; RV64ZVE32F-ZVFH-NEXT: j .LBB71_7
; RV64ZVE32F-ZVFH-NEXT: .LBB71_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
@@ -8319,8 +7976,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB71_11
+; RV64ZVE32F-ZVFH-NEXT: j .LBB71_10
; RV64ZVE32F-ZVFH-NEXT: .LBB71_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -8367,7 +8023,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_12
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_5: # %else4
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_13
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_6: # %else6
@@ -8376,7 +8032,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_7: # %else8
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_9
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_8: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -8392,10 +8048,10 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_15
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_10: # %else12
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB71_16
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_11: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_12: # %cond.store3
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -8406,8 +8062,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_6
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_13: # %cond.store5
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -8419,8 +8074,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_7
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_14: # %cond.store7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
@@ -8430,9 +8084,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_8
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_9
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_7
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -8442,8 +8094,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB71_11
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_10
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -8523,16 +8174,15 @@ define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB73_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB73_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB73_4
-; RV64ZVE32F-NEXT: .LBB73_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB73_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB73_2
+; RV64ZVE32F-NEXT: j .LBB73_1
; RV64ZVE32F-NEXT: .LBB73_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -8566,7 +8216,7 @@ define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB74_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB74_1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB74_6
; RV64ZVE32F-NEXT: .LBB74_2: # %else2
@@ -8575,26 +8225,23 @@ define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB74_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB74_8
-; RV64ZVE32F-NEXT: .LBB74_4: # %else6
+; RV64ZVE32F-NEXT: # %bb.4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB74_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB74_2
+; RV64ZVE32F-NEXT: j .LBB74_1
; RV64ZVE32F-NEXT: .LBB74_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse32.v v9, (a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB74_3
+; RV64ZVE32F-NEXT: j .LBB74_2
; RV64ZVE32F-NEXT: .LBB74_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse32.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a3, a3, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB74_4
+; RV64ZVE32F-NEXT: j .LBB74_3
; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -8672,7 +8319,7 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB77_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB77_1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB77_10
; RV64ZVE32F-NEXT: .LBB77_2: # %else2
@@ -8693,53 +8340,46 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB77_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB77_16
-; RV64ZVE32F-NEXT: .LBB77_8: # %else14
+; RV64ZVE32F-NEXT: # %bb.8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB77_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB77_2
+; RV64ZVE32F-NEXT: j .LBB77_1
; RV64ZVE32F-NEXT: .LBB77_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vse32.v v10, (t0)
-; RV64ZVE32F-NEXT: andi a0, a4, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB77_3
+; RV64ZVE32F-NEXT: j .LBB77_2
; RV64ZVE32F-NEXT: .LBB77_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV64ZVE32F-NEXT: vse32.v v10, (a7)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB77_4
+; RV64ZVE32F-NEXT: j .LBB77_3
; RV64ZVE32F-NEXT: .LBB77_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a6)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB77_5
+; RV64ZVE32F-NEXT: j .LBB77_4
; RV64ZVE32F-NEXT: .LBB77_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a5)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB77_6
+; RV64ZVE32F-NEXT: j .LBB77_5
; RV64ZVE32F-NEXT: .LBB77_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a3)
-; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: beqz a0, .LBB77_7
+; RV64ZVE32F-NEXT: j .LBB77_6
; RV64ZVE32F-NEXT: .LBB77_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB77_8
+; RV64ZVE32F-NEXT: j .LBB77_7
; RV64ZVE32F-NEXT: .LBB77_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -8799,7 +8439,7 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB78_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB78_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB78_13
; RV64ZVE32F-NEXT: .LBB78_6: # %else6
@@ -8808,7 +8448,7 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: .LBB78_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB78_9
-; RV64ZVE32F-NEXT: .LBB78_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -8823,10 +8463,10 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB78_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB78_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB78_16
-; RV64ZVE32F-NEXT: .LBB78_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB78_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -8836,8 +8476,7 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB78_6
+; RV64ZVE32F-NEXT: j .LBB78_5
; RV64ZVE32F-NEXT: .LBB78_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -8847,8 +8486,7 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB78_7
+; RV64ZVE32F-NEXT: j .LBB78_6
; RV64ZVE32F-NEXT: .LBB78_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -8858,9 +8496,7 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB78_8
-; RV64ZVE32F-NEXT: j .LBB78_9
+; RV64ZVE32F-NEXT: j .LBB78_7
; RV64ZVE32F-NEXT: .LBB78_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8869,8 +8505,7 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB78_11
+; RV64ZVE32F-NEXT: j .LBB78_10
; RV64ZVE32F-NEXT: .LBB78_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -8936,7 +8571,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB79_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB79_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB79_13
; RV64ZVE32F-NEXT: .LBB79_6: # %else6
@@ -8945,7 +8580,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB79_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB79_9
-; RV64ZVE32F-NEXT: .LBB79_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -8960,10 +8595,10 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB79_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB79_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB79_16
-; RV64ZVE32F-NEXT: .LBB79_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB79_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -8973,8 +8608,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB79_6
+; RV64ZVE32F-NEXT: j .LBB79_5
; RV64ZVE32F-NEXT: .LBB79_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -8984,8 +8618,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB79_7
+; RV64ZVE32F-NEXT: j .LBB79_6
; RV64ZVE32F-NEXT: .LBB79_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -8995,9 +8628,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB79_8
-; RV64ZVE32F-NEXT: j .LBB79_9
+; RV64ZVE32F-NEXT: j .LBB79_7
; RV64ZVE32F-NEXT: .LBB79_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -9006,8 +8637,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB79_11
+; RV64ZVE32F-NEXT: j .LBB79_10
; RV64ZVE32F-NEXT: .LBB79_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9077,7 +8707,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB80_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB80_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB80_13
; RV64ZVE32F-NEXT: .LBB80_6: # %else6
@@ -9086,7 +8716,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB80_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB80_9
-; RV64ZVE32F-NEXT: .LBB80_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9102,10 +8732,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB80_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB80_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB80_16
-; RV64ZVE32F-NEXT: .LBB80_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9116,8 +8746,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB80_6
+; RV64ZVE32F-NEXT: j .LBB80_5
; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9128,8 +8757,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB80_7
+; RV64ZVE32F-NEXT: j .LBB80_6
; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -9140,9 +8768,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB80_8
-; RV64ZVE32F-NEXT: j .LBB80_9
+; RV64ZVE32F-NEXT: j .LBB80_7
; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
@@ -9152,8 +8778,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB80_11
+; RV64ZVE32F-NEXT: j .LBB80_10
; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9223,7 +8848,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB81_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB81_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB81_13
; RV64ZVE32F-NEXT: .LBB81_6: # %else6
@@ -9232,7 +8857,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: .LBB81_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB81_9
-; RV64ZVE32F-NEXT: .LBB81_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9247,10 +8872,10 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB81_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB81_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB81_16
-; RV64ZVE32F-NEXT: .LBB81_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9260,8 +8885,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB81_6
+; RV64ZVE32F-NEXT: j .LBB81_5
; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9271,8 +8895,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB81_7
+; RV64ZVE32F-NEXT: j .LBB81_6
; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -9282,9 +8905,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB81_8
-; RV64ZVE32F-NEXT: j .LBB81_9
+; RV64ZVE32F-NEXT: j .LBB81_7
; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -9293,8 +8914,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB81_11
+; RV64ZVE32F-NEXT: j .LBB81_10
; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9362,7 +8982,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB82_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB82_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB82_13
; RV64ZVE32F-NEXT: .LBB82_6: # %else6
@@ -9371,7 +8991,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB82_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB82_9
-; RV64ZVE32F-NEXT: .LBB82_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9386,10 +9006,10 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB82_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB82_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB82_16
-; RV64ZVE32F-NEXT: .LBB82_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9399,8 +9019,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB82_6
+; RV64ZVE32F-NEXT: j .LBB82_5
; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9410,8 +9029,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB82_7
+; RV64ZVE32F-NEXT: j .LBB82_6
; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -9421,9 +9039,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_8
-; RV64ZVE32F-NEXT: j .LBB82_9
+; RV64ZVE32F-NEXT: j .LBB82_7
; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -9432,8 +9048,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB82_11
+; RV64ZVE32F-NEXT: j .LBB82_10
; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9504,7 +9119,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB83_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB83_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB83_13
; RV64ZVE32F-NEXT: .LBB83_6: # %else6
@@ -9513,7 +9128,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB83_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB83_9
-; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9529,10 +9144,10 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB83_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
-; RV64ZVE32F-NEXT: .LBB83_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9543,8 +9158,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
+; RV64ZVE32F-NEXT: j .LBB83_5
; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9555,8 +9169,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
+; RV64ZVE32F-NEXT: j .LBB83_6
; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -9567,9 +9180,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_8
-; RV64ZVE32F-NEXT: j .LBB83_9
+; RV64ZVE32F-NEXT: j .LBB83_7
; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 48
@@ -9579,8 +9190,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB83_11
+; RV64ZVE32F-NEXT: j .LBB83_10
; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9645,7 +9255,7 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB84_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB84_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB84_13
; RV64ZVE32F-NEXT: .LBB84_6: # %else6
@@ -9654,7 +9264,7 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: .LBB84_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
-; RV64ZVE32F-NEXT: .LBB84_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9669,10 +9279,10 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB84_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB84_16
-; RV64ZVE32F-NEXT: .LBB84_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9681,8 +9291,7 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
+; RV64ZVE32F-NEXT: j .LBB84_5
; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9691,8 +9300,7 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_7
+; RV64ZVE32F-NEXT: j .LBB84_6
; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@@ -9701,9 +9309,7 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_8
-; RV64ZVE32F-NEXT: j .LBB84_9
+; RV64ZVE32F-NEXT: j .LBB84_7
; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -9712,8 +9318,7 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB84_11
+; RV64ZVE32F-NEXT: j .LBB84_10
; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9791,17 +9396,16 @@ define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
; RV32ZVE32F-NEXT: bnez a1, .LBB86_3
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB86_1: # %else
; RV32ZVE32F-NEXT: andi a0, a0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB86_4
-; RV32ZVE32F-NEXT: .LBB86_2: # %else2
+; RV32ZVE32F-NEXT: # %bb.2: # %else2
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB86_3: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a0, a0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB86_2
+; RV32ZVE32F-NEXT: j .LBB86_1
; RV32ZVE32F-NEXT: .LBB86_4: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -9815,15 +9419,14 @@ define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB86_3
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB86_1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB86_4
-; RV64ZVE32F-NEXT: .LBB86_2: # %else2
+; RV64ZVE32F-NEXT: # %bb.2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB86_3: # %cond.store
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB86_2
+; RV64ZVE32F-NEXT: j .LBB86_1
; RV64ZVE32F-NEXT: .LBB86_4: # %cond.store1
; RV64ZVE32F-NEXT: fsd fa1, 0(a1)
; RV64ZVE32F-NEXT: ret
@@ -9852,7 +9455,7 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
; RV32ZVE32F-NEXT: bnez a1, .LBB87_5
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB87_1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB87_6
; RV32ZVE32F-NEXT: .LBB87_2: # %else2
@@ -9861,28 +9464,25 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: .LBB87_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a0, 8
; RV32ZVE32F-NEXT: bnez a0, .LBB87_8
-; RV32ZVE32F-NEXT: .LBB87_4: # %else6
+; RV32ZVE32F-NEXT: # %bb.4: # %else6
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB87_5: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB87_2
+; RV32ZVE32F-NEXT: j .LBB87_1
; RV32ZVE32F-NEXT: .LBB87_6: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB87_3
+; RV32ZVE32F-NEXT: j .LBB87_2
; RV32ZVE32F-NEXT: .LBB87_7: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a0, a0, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB87_4
+; RV32ZVE32F-NEXT: j .LBB87_3
; RV32ZVE32F-NEXT: .LBB87_8: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -9899,7 +9499,7 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB87_5
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB87_1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB87_6
; RV64ZVE32F-NEXT: .LBB87_2: # %else2
@@ -9908,21 +9508,18 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB87_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB87_8
-; RV64ZVE32F-NEXT: .LBB87_4: # %else6
+; RV64ZVE32F-NEXT: # %bb.4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB87_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB87_2
+; RV64ZVE32F-NEXT: j .LBB87_1
; RV64ZVE32F-NEXT: .LBB87_6: # %cond.store1
; RV64ZVE32F-NEXT: fsd fa1, 0(a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB87_3
+; RV64ZVE32F-NEXT: j .LBB87_2
; RV64ZVE32F-NEXT: .LBB87_7: # %cond.store3
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: andi a3, a3, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB87_4
+; RV64ZVE32F-NEXT: j .LBB87_3
; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store5
; RV64ZVE32F-NEXT: fsd fa3, 0(a1)
; RV64ZVE32F-NEXT: ret
@@ -10003,7 +9600,7 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
; RV32ZVE32F-NEXT: bnez a1, .LBB90_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB90_1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB90_10
; RV32ZVE32F-NEXT: .LBB90_2: # %else2
@@ -10024,56 +9621,49 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: .LBB90_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB90_16
-; RV32ZVE32F-NEXT: .LBB90_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_2
+; RV32ZVE32F-NEXT: j .LBB90_1
; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_3
+; RV32ZVE32F-NEXT: j .LBB90_2
; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_4
+; RV32ZVE32F-NEXT: j .LBB90_3
; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_5
+; RV32ZVE32F-NEXT: j .LBB90_4
; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_6
+; RV32ZVE32F-NEXT: j .LBB90_5
; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_7
+; RV32ZVE32F-NEXT: j .LBB90_6
; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a0, a0, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB90_8
+; RV32ZVE32F-NEXT: j .LBB90_7
; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -10094,7 +9684,7 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB90_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB90_1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB90_10
; RV64ZVE32F-NEXT: .LBB90_2: # %else2
@@ -10115,37 +9705,30 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB90_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB90_16
-; RV64ZVE32F-NEXT: .LBB90_8: # %else14
+; RV64ZVE32F-NEXT: # %bb.8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB90_2
+; RV64ZVE32F-NEXT: j .LBB90_1
; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1
; RV64ZVE32F-NEXT: fsd fa1, 0(t0)
-; RV64ZVE32F-NEXT: andi a0, a4, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB90_3
+; RV64ZVE32F-NEXT: j .LBB90_2
; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3
; RV64ZVE32F-NEXT: fsd fa2, 0(a7)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB90_4
+; RV64ZVE32F-NEXT: j .LBB90_3
; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5
; RV64ZVE32F-NEXT: fsd fa3, 0(a6)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB90_5
+; RV64ZVE32F-NEXT: j .LBB90_4
; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7
; RV64ZVE32F-NEXT: fsd fa4, 0(a5)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB90_6
+; RV64ZVE32F-NEXT: j .LBB90_5
; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9
; RV64ZVE32F-NEXT: fsd fa5, 0(a3)
-; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: beqz a0, .LBB90_7
+; RV64ZVE32F-NEXT: j .LBB90_6
; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB90_8
+; RV64ZVE32F-NEXT: j .LBB90_7
; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13
; RV64ZVE32F-NEXT: fsd fa7, 0(a1)
; RV64ZVE32F-NEXT: ret
@@ -10182,7 +9765,7 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB91_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB91_1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB91_10
; RV32ZVE32F-NEXT: .LBB91_2: # %else2
@@ -10203,55 +9786,48 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV32ZVE32F-NEXT: .LBB91_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB91_16
-; RV32ZVE32F-NEXT: .LBB91_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB91_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB91_2
+; RV32ZVE32F-NEXT: j .LBB91_1
; RV32ZVE32F-NEXT: .LBB91_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB91_3
+; RV32ZVE32F-NEXT: j .LBB91_2
; RV32ZVE32F-NEXT: .LBB91_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB91_4
+; RV32ZVE32F-NEXT: j .LBB91_3
; RV32ZVE32F-NEXT: .LBB91_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB91_5
+; RV32ZVE32F-NEXT: j .LBB91_4
; RV32ZVE32F-NEXT: .LBB91_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB91_6
+; RV32ZVE32F-NEXT: j .LBB91_5
; RV32ZVE32F-NEXT: .LBB91_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB91_7
+; RV32ZVE32F-NEXT: j .LBB91_6
; RV32ZVE32F-NEXT: .LBB91_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB91_8
+; RV32ZVE32F-NEXT: j .LBB91_7
; RV32ZVE32F-NEXT: .LBB91_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -10287,7 +9863,7 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB91_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB91_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB91_13
; RV64ZVE32F-NEXT: .LBB91_6: # %else6
@@ -10296,7 +9872,7 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: .LBB91_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB91_9
-; RV64ZVE32F-NEXT: .LBB91_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
@@ -10306,41 +9882,36 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB91_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB91_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB91_16
-; RV64ZVE32F-NEXT: .LBB91_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB91_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_6
+; RV64ZVE32F-NEXT: j .LBB91_5
; RV64ZVE32F-NEXT: .LBB91_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_7
+; RV64ZVE32F-NEXT: j .LBB91_6
; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_8
-; RV64ZVE32F-NEXT: j .LBB91_9
+; RV64ZVE32F-NEXT: j .LBB91_7
; RV64ZVE32F-NEXT: .LBB91_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB91_11
+; RV64ZVE32F-NEXT: j .LBB91_10
; RV64ZVE32F-NEXT: .LBB91_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10382,7 +9953,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB92_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB92_1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB92_10
; RV32ZVE32F-NEXT: .LBB92_2: # %else2
@@ -10403,55 +9974,48 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32ZVE32F-NEXT: .LBB92_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB92_16
-; RV32ZVE32F-NEXT: .LBB92_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB92_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB92_2
+; RV32ZVE32F-NEXT: j .LBB92_1
; RV32ZVE32F-NEXT: .LBB92_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB92_3
+; RV32ZVE32F-NEXT: j .LBB92_2
; RV32ZVE32F-NEXT: .LBB92_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB92_4
+; RV32ZVE32F-NEXT: j .LBB92_3
; RV32ZVE32F-NEXT: .LBB92_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB92_5
+; RV32ZVE32F-NEXT: j .LBB92_4
; RV32ZVE32F-NEXT: .LBB92_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB92_6
+; RV32ZVE32F-NEXT: j .LBB92_5
; RV32ZVE32F-NEXT: .LBB92_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB92_7
+; RV32ZVE32F-NEXT: j .LBB92_6
; RV32ZVE32F-NEXT: .LBB92_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB92_8
+; RV32ZVE32F-NEXT: j .LBB92_7
; RV32ZVE32F-NEXT: .LBB92_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -10487,7 +10051,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB92_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB92_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB92_13
; RV64ZVE32F-NEXT: .LBB92_6: # %else6
@@ -10496,7 +10060,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB92_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB92_9
-; RV64ZVE32F-NEXT: .LBB92_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
@@ -10506,41 +10070,36 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB92_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB92_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB92_16
-; RV64ZVE32F-NEXT: .LBB92_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB92_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_6
+; RV64ZVE32F-NEXT: j .LBB92_5
; RV64ZVE32F-NEXT: .LBB92_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_7
+; RV64ZVE32F-NEXT: j .LBB92_6
; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_8
-; RV64ZVE32F-NEXT: j .LBB92_9
+; RV64ZVE32F-NEXT: j .LBB92_7
; RV64ZVE32F-NEXT: .LBB92_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB92_11
+; RV64ZVE32F-NEXT: j .LBB92_10
; RV64ZVE32F-NEXT: .LBB92_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10584,7 +10143,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB93_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB93_1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB93_10
; RV32ZVE32F-NEXT: .LBB93_2: # %else2
@@ -10605,55 +10164,48 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32ZVE32F-NEXT: .LBB93_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB93_16
-; RV32ZVE32F-NEXT: .LBB93_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB93_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB93_2
+; RV32ZVE32F-NEXT: j .LBB93_1
; RV32ZVE32F-NEXT: .LBB93_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB93_3
+; RV32ZVE32F-NEXT: j .LBB93_2
; RV32ZVE32F-NEXT: .LBB93_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB93_4
+; RV32ZVE32F-NEXT: j .LBB93_3
; RV32ZVE32F-NEXT: .LBB93_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB93_5
+; RV32ZVE32F-NEXT: j .LBB93_4
; RV32ZVE32F-NEXT: .LBB93_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB93_6
+; RV32ZVE32F-NEXT: j .LBB93_5
; RV32ZVE32F-NEXT: .LBB93_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB93_7
+; RV32ZVE32F-NEXT: j .LBB93_6
; RV32ZVE32F-NEXT: .LBB93_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB93_8
+; RV32ZVE32F-NEXT: j .LBB93_7
; RV32ZVE32F-NEXT: .LBB93_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -10691,7 +10243,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB93_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB93_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB93_13
; RV64ZVE32F-NEXT: .LBB93_6: # %else6
@@ -10700,7 +10252,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB93_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB93_9
-; RV64ZVE32F-NEXT: .LBB93_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
@@ -10711,10 +10263,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB93_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB93_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB93_16
-; RV64ZVE32F-NEXT: .LBB93_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB93_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10722,8 +10274,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB93_6
+; RV64ZVE32F-NEXT: j .LBB93_5
; RV64ZVE32F-NEXT: .LBB93_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10731,25 +10282,21 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB93_7
+; RV64ZVE32F-NEXT: j .LBB93_6
; RV64ZVE32F-NEXT: .LBB93_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB93_8
-; RV64ZVE32F-NEXT: j .LBB93_9
+; RV64ZVE32F-NEXT: j .LBB93_7
; RV64ZVE32F-NEXT: .LBB93_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB93_11
+; RV64ZVE32F-NEXT: j .LBB93_10
; RV64ZVE32F-NEXT: .LBB93_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10793,7 +10340,7 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8
; RV32ZVE32F-NEXT: bnez a2, .LBB94_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB94_1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB94_10
; RV32ZVE32F-NEXT: .LBB94_2: # %else2
@@ -10814,56 +10361,49 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV32ZVE32F-NEXT: .LBB94_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB94_16
-; RV32ZVE32F-NEXT: .LBB94_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB94_9: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_2
+; RV32ZVE32F-NEXT: j .LBB94_1
; RV32ZVE32F-NEXT: .LBB94_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_3
+; RV32ZVE32F-NEXT: j .LBB94_2
; RV32ZVE32F-NEXT: .LBB94_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_4
+; RV32ZVE32F-NEXT: j .LBB94_3
; RV32ZVE32F-NEXT: .LBB94_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_5
+; RV32ZVE32F-NEXT: j .LBB94_4
; RV32ZVE32F-NEXT: .LBB94_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_6
+; RV32ZVE32F-NEXT: j .LBB94_5
; RV32ZVE32F-NEXT: .LBB94_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_7
+; RV32ZVE32F-NEXT: j .LBB94_6
; RV32ZVE32F-NEXT: .LBB94_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a0, a0, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB94_8
+; RV32ZVE32F-NEXT: j .LBB94_7
; RV32ZVE32F-NEXT: .LBB94_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
@@ -10900,7 +10440,7 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB94_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB94_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB94_13
; RV64ZVE32F-NEXT: .LBB94_6: # %else6
@@ -10909,7 +10449,7 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV64ZVE32F-NEXT: .LBB94_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB94_9
-; RV64ZVE32F-NEXT: .LBB94_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
@@ -10919,41 +10459,36 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB94_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB94_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB94_16
-; RV64ZVE32F-NEXT: .LBB94_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB94_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB94_6
+; RV64ZVE32F-NEXT: j .LBB94_5
; RV64ZVE32F-NEXT: .LBB94_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB94_7
+; RV64ZVE32F-NEXT: j .LBB94_6
; RV64ZVE32F-NEXT: .LBB94_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB94_8
-; RV64ZVE32F-NEXT: j .LBB94_9
+; RV64ZVE32F-NEXT: j .LBB94_7
; RV64ZVE32F-NEXT: .LBB94_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB94_11
+; RV64ZVE32F-NEXT: j .LBB94_10
; RV64ZVE32F-NEXT: .LBB94_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10995,7 +10530,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8
; RV32ZVE32F-NEXT: bnez a2, .LBB95_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB95_1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB95_10
; RV32ZVE32F-NEXT: .LBB95_2: # %else2
@@ -11016,56 +10551,49 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: .LBB95_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB95_16
-; RV32ZVE32F-NEXT: .LBB95_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB95_9: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_2
+; RV32ZVE32F-NEXT: j .LBB95_1
; RV32ZVE32F-NEXT: .LBB95_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_3
+; RV32ZVE32F-NEXT: j .LBB95_2
; RV32ZVE32F-NEXT: .LBB95_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_4
+; RV32ZVE32F-NEXT: j .LBB95_3
; RV32ZVE32F-NEXT: .LBB95_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_5
+; RV32ZVE32F-NEXT: j .LBB95_4
; RV32ZVE32F-NEXT: .LBB95_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_6
+; RV32ZVE32F-NEXT: j .LBB95_5
; RV32ZVE32F-NEXT: .LBB95_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_7
+; RV32ZVE32F-NEXT: j .LBB95_6
; RV32ZVE32F-NEXT: .LBB95_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a0, a0, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB95_8
+; RV32ZVE32F-NEXT: j .LBB95_7
; RV32ZVE32F-NEXT: .LBB95_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
@@ -11102,7 +10630,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB95_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB95_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB95_13
; RV64ZVE32F-NEXT: .LBB95_6: # %else6
@@ -11111,7 +10639,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB95_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB95_9
-; RV64ZVE32F-NEXT: .LBB95_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
@@ -11121,41 +10649,36 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB95_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB95_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB95_16
-; RV64ZVE32F-NEXT: .LBB95_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB95_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB95_6
+; RV64ZVE32F-NEXT: j .LBB95_5
; RV64ZVE32F-NEXT: .LBB95_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB95_7
+; RV64ZVE32F-NEXT: j .LBB95_6
; RV64ZVE32F-NEXT: .LBB95_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB95_8
-; RV64ZVE32F-NEXT: j .LBB95_9
+; RV64ZVE32F-NEXT: j .LBB95_7
; RV64ZVE32F-NEXT: .LBB95_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB95_11
+; RV64ZVE32F-NEXT: j .LBB95_10
; RV64ZVE32F-NEXT: .LBB95_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11199,7 +10722,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccu.vx v10, a1, v8
; RV32ZVE32F-NEXT: bnez a2, .LBB96_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB96_1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB96_10
; RV32ZVE32F-NEXT: .LBB96_2: # %else2
@@ -11220,56 +10743,49 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: .LBB96_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB96_16
-; RV32ZVE32F-NEXT: .LBB96_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB96_9: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_2
+; RV32ZVE32F-NEXT: j .LBB96_1
; RV32ZVE32F-NEXT: .LBB96_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_3
+; RV32ZVE32F-NEXT: j .LBB96_2
; RV32ZVE32F-NEXT: .LBB96_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_4
+; RV32ZVE32F-NEXT: j .LBB96_3
; RV32ZVE32F-NEXT: .LBB96_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_5
+; RV32ZVE32F-NEXT: j .LBB96_4
; RV32ZVE32F-NEXT: .LBB96_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_6
+; RV32ZVE32F-NEXT: j .LBB96_5
; RV32ZVE32F-NEXT: .LBB96_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a1, a0, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_7
+; RV32ZVE32F-NEXT: j .LBB96_6
; RV32ZVE32F-NEXT: .LBB96_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
-; RV32ZVE32F-NEXT: andi a0, a0, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB96_8
+; RV32ZVE32F-NEXT: j .LBB96_7
; RV32ZVE32F-NEXT: .LBB96_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
@@ -11308,7 +10824,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB96_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB96_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB96_13
; RV64ZVE32F-NEXT: .LBB96_6: # %else6
@@ -11317,7 +10833,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB96_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB96_9
-; RV64ZVE32F-NEXT: .LBB96_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 48
@@ -11328,10 +10844,10 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB96_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB96_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB96_16
-; RV64ZVE32F-NEXT: .LBB96_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -11339,8 +10855,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB96_6
+; RV64ZVE32F-NEXT: j .LBB96_5
; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -11348,25 +10863,21 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB96_7
+; RV64ZVE32F-NEXT: j .LBB96_6
; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 48
; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB96_8
-; RV64ZVE32F-NEXT: j .LBB96_9
+; RV64ZVE32F-NEXT: j .LBB96_7
; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 48
; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB96_11
+; RV64ZVE32F-NEXT: j .LBB96_10
; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11409,7 +10920,7 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB97_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB97_1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB97_10
; RV32ZVE32F-NEXT: .LBB97_2: # %else2
@@ -11430,55 +10941,48 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV32ZVE32F-NEXT: .LBB97_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB97_16
-; RV32ZVE32F-NEXT: .LBB97_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB97_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB97_2
+; RV32ZVE32F-NEXT: j .LBB97_1
; RV32ZVE32F-NEXT: .LBB97_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB97_3
+; RV32ZVE32F-NEXT: j .LBB97_2
; RV32ZVE32F-NEXT: .LBB97_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB97_4
+; RV32ZVE32F-NEXT: j .LBB97_3
; RV32ZVE32F-NEXT: .LBB97_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB97_5
+; RV32ZVE32F-NEXT: j .LBB97_4
; RV32ZVE32F-NEXT: .LBB97_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB97_6
+; RV32ZVE32F-NEXT: j .LBB97_5
; RV32ZVE32F-NEXT: .LBB97_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB97_7
+; RV32ZVE32F-NEXT: j .LBB97_6
; RV32ZVE32F-NEXT: .LBB97_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB97_8
+; RV32ZVE32F-NEXT: j .LBB97_7
; RV32ZVE32F-NEXT: .LBB97_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -11515,7 +11019,7 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB97_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB97_13
; RV64ZVE32F-NEXT: .LBB97_6: # %else6
@@ -11524,7 +11028,7 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV64ZVE32F-NEXT: .LBB97_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB97_9
-; RV64ZVE32F-NEXT: .LBB97_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
@@ -11534,41 +11038,36 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB97_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB97_16
-; RV64ZVE32F-NEXT: .LBB97_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB97_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_6
+; RV64ZVE32F-NEXT: j .LBB97_5
; RV64ZVE32F-NEXT: .LBB97_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_7
+; RV64ZVE32F-NEXT: j .LBB97_6
; RV64ZVE32F-NEXT: .LBB97_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB97_8
-; RV64ZVE32F-NEXT: j .LBB97_9
+; RV64ZVE32F-NEXT: j .LBB97_7
; RV64ZVE32F-NEXT: .LBB97_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB97_11
+; RV64ZVE32F-NEXT: j .LBB97_10
; RV64ZVE32F-NEXT: .LBB97_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11609,7 +11108,7 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB98_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB98_1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB98_10
; RV32ZVE32F-NEXT: .LBB98_2: # %else2
@@ -11630,55 +11129,48 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: .LBB98_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB98_16
-; RV32ZVE32F-NEXT: .LBB98_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB98_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB98_2
+; RV32ZVE32F-NEXT: j .LBB98_1
; RV32ZVE32F-NEXT: .LBB98_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB98_3
+; RV32ZVE32F-NEXT: j .LBB98_2
; RV32ZVE32F-NEXT: .LBB98_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB98_4
+; RV32ZVE32F-NEXT: j .LBB98_3
; RV32ZVE32F-NEXT: .LBB98_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB98_5
+; RV32ZVE32F-NEXT: j .LBB98_4
; RV32ZVE32F-NEXT: .LBB98_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB98_6
+; RV32ZVE32F-NEXT: j .LBB98_5
; RV32ZVE32F-NEXT: .LBB98_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB98_7
+; RV32ZVE32F-NEXT: j .LBB98_6
; RV32ZVE32F-NEXT: .LBB98_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB98_8
+; RV32ZVE32F-NEXT: j .LBB98_7
; RV32ZVE32F-NEXT: .LBB98_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -11715,7 +11207,7 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB98_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB98_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB98_13
; RV64ZVE32F-NEXT: .LBB98_6: # %else6
@@ -11724,7 +11216,7 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB98_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB98_9
-; RV64ZVE32F-NEXT: .LBB98_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
@@ -11734,41 +11226,36 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB98_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB98_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB98_16
-; RV64ZVE32F-NEXT: .LBB98_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB98_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_6
+; RV64ZVE32F-NEXT: j .LBB98_5
; RV64ZVE32F-NEXT: .LBB98_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_7
+; RV64ZVE32F-NEXT: j .LBB98_6
; RV64ZVE32F-NEXT: .LBB98_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB98_8
-; RV64ZVE32F-NEXT: j .LBB98_9
+; RV64ZVE32F-NEXT: j .LBB98_7
; RV64ZVE32F-NEXT: .LBB98_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB98_11
+; RV64ZVE32F-NEXT: j .LBB98_10
; RV64ZVE32F-NEXT: .LBB98_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11810,7 +11297,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB99_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB99_1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB99_10
; RV32ZVE32F-NEXT: .LBB99_2: # %else2
@@ -11831,55 +11318,48 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: .LBB99_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB99_16
-; RV32ZVE32F-NEXT: .LBB99_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB99_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB99_2
+; RV32ZVE32F-NEXT: j .LBB99_1
; RV32ZVE32F-NEXT: .LBB99_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB99_3
+; RV32ZVE32F-NEXT: j .LBB99_2
; RV32ZVE32F-NEXT: .LBB99_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB99_4
+; RV32ZVE32F-NEXT: j .LBB99_3
; RV32ZVE32F-NEXT: .LBB99_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB99_5
+; RV32ZVE32F-NEXT: j .LBB99_4
; RV32ZVE32F-NEXT: .LBB99_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB99_6
+; RV32ZVE32F-NEXT: j .LBB99_5
; RV32ZVE32F-NEXT: .LBB99_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB99_7
+; RV32ZVE32F-NEXT: j .LBB99_6
; RV32ZVE32F-NEXT: .LBB99_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB99_8
+; RV32ZVE32F-NEXT: j .LBB99_7
; RV32ZVE32F-NEXT: .LBB99_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -11918,7 +11398,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB99_12
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB99_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB99_13
; RV64ZVE32F-NEXT: .LBB99_6: # %else6
@@ -11927,7 +11407,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB99_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB99_9
-; RV64ZVE32F-NEXT: .LBB99_8: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
@@ -11938,10 +11418,10 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB99_15
-; RV64ZVE32F-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-NEXT: .LBB99_10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB99_16
-; RV64ZVE32F-NEXT: .LBB99_11: # %else14
+; RV64ZVE32F-NEXT: # %bb.11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB99_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -11949,8 +11429,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB99_6
+; RV64ZVE32F-NEXT: j .LBB99_5
; RV64ZVE32F-NEXT: .LBB99_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -11958,25 +11437,21 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB99_7
+; RV64ZVE32F-NEXT: j .LBB99_6
; RV64ZVE32F-NEXT: .LBB99_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB99_8
-; RV64ZVE32F-NEXT: j .LBB99_9
+; RV64ZVE32F-NEXT: j .LBB99_7
; RV64ZVE32F-NEXT: .LBB99_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB99_11
+; RV64ZVE32F-NEXT: j .LBB99_10
; RV64ZVE32F-NEXT: .LBB99_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12034,7 +11509,7 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB100_9
-; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: .LBB100_1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB100_10
; RV32ZVE32F-NEXT: .LBB100_2: # %else2
@@ -12055,55 +11530,48 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV32ZVE32F-NEXT: .LBB100_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB100_16
-; RV32ZVE32F-NEXT: .LBB100_8: # %else14
+; RV32ZVE32F-NEXT: # %bb.8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB100_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB100_2
+; RV32ZVE32F-NEXT: j .LBB100_1
; RV32ZVE32F-NEXT: .LBB100_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB100_3
+; RV32ZVE32F-NEXT: j .LBB100_2
; RV32ZVE32F-NEXT: .LBB100_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB100_4
+; RV32ZVE32F-NEXT: j .LBB100_3
; RV32ZVE32F-NEXT: .LBB100_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB100_5
+; RV32ZVE32F-NEXT: j .LBB100_4
; RV32ZVE32F-NEXT: .LBB100_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB100_6
+; RV32ZVE32F-NEXT: j .LBB100_5
; RV32ZVE32F-NEXT: .LBB100_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB100_7
+; RV32ZVE32F-NEXT: j .LBB100_6
; RV32ZVE32F-NEXT: .LBB100_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB100_8
+; RV32ZVE32F-NEXT: j .LBB100_7
; RV32ZVE32F-NEXT: .LBB100_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -12124,7 +11592,7 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi t2, a3, 1
; RV64ZVE32F-NEXT: bnez t2, .LBB100_9
-; RV64ZVE32F-NEXT: # %bb.1: # %else
+; RV64ZVE32F-NEXT: .LBB100_1: # %else
; RV64ZVE32F-NEXT: andi a1, a3, 2
; RV64ZVE32F-NEXT: bnez a1, .LBB100_10
; RV64ZVE32F-NEXT: .LBB100_2: # %else2
@@ -12145,51 +11613,44 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV64ZVE32F-NEXT: .LBB100_7: # %else12
; RV64ZVE32F-NEXT: andi a1, a3, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB100_16
-; RV64ZVE32F-NEXT: .LBB100_8: # %else14
+; RV64ZVE32F-NEXT: # %bb.8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB100_9: # %cond.store
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: slli a1, a1, 3
; RV64ZVE32F-NEXT: add a1, a0, a1
; RV64ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV64ZVE32F-NEXT: andi a1, a3, 2
-; RV64ZVE32F-NEXT: beqz a1, .LBB100_2
+; RV64ZVE32F-NEXT: j .LBB100_1
; RV64ZVE32F-NEXT: .LBB100_10: # %cond.store1
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a0, t1
; RV64ZVE32F-NEXT: fsd fa1, 0(t1)
-; RV64ZVE32F-NEXT: andi a1, a3, 4
-; RV64ZVE32F-NEXT: beqz a1, .LBB100_3
+; RV64ZVE32F-NEXT: j .LBB100_2
; RV64ZVE32F-NEXT: .LBB100_11: # %cond.store3
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a0, t0
; RV64ZVE32F-NEXT: fsd fa2, 0(t0)
-; RV64ZVE32F-NEXT: andi a1, a3, 8
-; RV64ZVE32F-NEXT: beqz a1, .LBB100_4
+; RV64ZVE32F-NEXT: j .LBB100_3
; RV64ZVE32F-NEXT: .LBB100_12: # %cond.store5
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a0, a7
; RV64ZVE32F-NEXT: fsd fa3, 0(a7)
-; RV64ZVE32F-NEXT: andi a1, a3, 16
-; RV64ZVE32F-NEXT: beqz a1, .LBB100_5
+; RV64ZVE32F-NEXT: j .LBB100_4
; RV64ZVE32F-NEXT: .LBB100_13: # %cond.store7
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a0, a6
; RV64ZVE32F-NEXT: fsd fa4, 0(a6)
-; RV64ZVE32F-NEXT: andi a1, a3, 32
-; RV64ZVE32F-NEXT: beqz a1, .LBB100_6
+; RV64ZVE32F-NEXT: j .LBB100_5
; RV64ZVE32F-NEXT: .LBB100_14: # %cond.store9
; RV64ZVE32F-NEXT: slli a5, a5, 3
; RV64ZVE32F-NEXT: add a5, a0, a5
; RV64ZVE32F-NEXT: fsd fa5, 0(a5)
-; RV64ZVE32F-NEXT: andi a1, a3, 64
-; RV64ZVE32F-NEXT: beqz a1, .LBB100_7
+; RV64ZVE32F-NEXT: j .LBB100_6
; RV64ZVE32F-NEXT: .LBB100_15: # %cond.store11
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a0, a4
; RV64ZVE32F-NEXT: fsd fa6, 0(a4)
-; RV64ZVE32F-NEXT: andi a1, a3, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB100_8
+; RV64ZVE32F-NEXT: j .LBB100_7
; RV64ZVE32F-NEXT: .LBB100_16: # %cond.store13
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a0, a0, a2
@@ -12248,13 +11709,13 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB101_25
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB101_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB101_26
; RV64ZVE32F-NEXT: .LBB101_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB101_8
-; RV64ZVE32F-NEXT: .LBB101_7: # %cond.store7
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12278,7 +11739,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB101_27
-; RV64ZVE32F-NEXT: # %bb.11: # %else12
+; RV64ZVE32F-NEXT: .LBB101_11: # %else12
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB101_28
; RV64ZVE32F-NEXT: .LBB101_12: # %else14
@@ -12287,7 +11748,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB101_13: # %else16
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB101_15
-; RV64ZVE32F-NEXT: .LBB101_14: # %cond.store17
+; RV64ZVE32F-NEXT: # %bb.14: # %cond.store17
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -12302,7 +11763,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB101_30
-; RV64ZVE32F-NEXT: # %bb.16: # %else20
+; RV64ZVE32F-NEXT: .LBB101_16: # %else20
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: bltz a2, .LBB101_31
; RV64ZVE32F-NEXT: .LBB101_17: # %else22
@@ -12311,7 +11772,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB101_18: # %else24
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bgez a2, .LBB101_20
-; RV64ZVE32F-NEXT: .LBB101_19: # %cond.store25
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.store25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -12350,8 +11811,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB101_6
+; RV64ZVE32F-NEXT: j .LBB101_5
; RV64ZVE32F-NEXT: .LBB101_26: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
@@ -12360,17 +11820,14 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB101_7
-; RV64ZVE32F-NEXT: j .LBB101_8
+; RV64ZVE32F-NEXT: j .LBB101_6
; RV64ZVE32F-NEXT: .LBB101_27: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: beqz a2, .LBB101_12
+; RV64ZVE32F-NEXT: j .LBB101_11
; RV64ZVE32F-NEXT: .LBB101_28: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -12379,25 +11836,21 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: beqz a2, .LBB101_13
+; RV64ZVE32F-NEXT: j .LBB101_12
; RV64ZVE32F-NEXT: .LBB101_29: # %cond.store15
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: bnez a2, .LBB101_14
-; RV64ZVE32F-NEXT: j .LBB101_15
+; RV64ZVE32F-NEXT: j .LBB101_13
; RV64ZVE32F-NEXT: .LBB101_30: # %cond.store19
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 52
-; RV64ZVE32F-NEXT: bgez a2, .LBB101_17
+; RV64ZVE32F-NEXT: j .LBB101_16
; RV64ZVE32F-NEXT: .LBB101_31: # %cond.store21
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -12406,17 +11859,14 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 11
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 51
-; RV64ZVE32F-NEXT: bgez a2, .LBB101_18
+; RV64ZVE32F-NEXT: j .LBB101_17
; RV64ZVE32F-NEXT: .LBB101_32: # %cond.store23
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 12
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 50
-; RV64ZVE32F-NEXT: bltz a2, .LBB101_19
-; RV64ZVE32F-NEXT: j .LBB101_20
+; RV64ZVE32F-NEXT: j .LBB101_18
%ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, i32 1, <16 x i1> %m)
ret void
@@ -12480,13 +11930,13 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB102_49
-; RV64ZVE32F-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-NEXT: .LBB102_5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB102_50
; RV64ZVE32F-NEXT: .LBB102_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB102_8
-; RV64ZVE32F-NEXT: .LBB102_7: # %cond.store7
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12510,7 +11960,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB102_51
-; RV64ZVE32F-NEXT: # %bb.11: # %else12
+; RV64ZVE32F-NEXT: .LBB102_11: # %else12
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB102_52
; RV64ZVE32F-NEXT: .LBB102_12: # %else14
@@ -12519,7 +11969,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB102_13: # %else16
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB102_15
-; RV64ZVE32F-NEXT: .LBB102_14: # %cond.store17
+; RV64ZVE32F-NEXT: # %bb.14: # %cond.store17
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@@ -12578,7 +12028,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB102_54
-; RV64ZVE32F-NEXT: # %bb.24: # %else28
+; RV64ZVE32F-NEXT: .LBB102_24: # %else28
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bltz a2, .LBB102_55
; RV64ZVE32F-NEXT: .LBB102_25: # %else30
@@ -12587,7 +12037,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB102_26: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bgez a2, .LBB102_28
-; RV64ZVE32F-NEXT: .LBB102_27: # %cond.store33
+; RV64ZVE32F-NEXT: # %bb.27: # %cond.store33
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -12603,13 +12053,13 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB102_57
-; RV64ZVE32F-NEXT: # %bb.29: # %else36
+; RV64ZVE32F-NEXT: .LBB102_29: # %else36
; RV64ZVE32F-NEXT: slli a2, a1, 44
; RV64ZVE32F-NEXT: bltz a2, .LBB102_58
; RV64ZVE32F-NEXT: .LBB102_30: # %else38
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bgez a2, .LBB102_32
-; RV64ZVE32F-NEXT: .LBB102_31: # %cond.store39
+; RV64ZVE32F-NEXT: # %bb.31: # %cond.store39
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12635,7 +12085,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB102_59
-; RV64ZVE32F-NEXT: # %bb.35: # %else44
+; RV64ZVE32F-NEXT: .LBB102_35: # %else44
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bltz a2, .LBB102_60
; RV64ZVE32F-NEXT: .LBB102_36: # %else46
@@ -12644,7 +12094,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB102_37: # %else48
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bgez a2, .LBB102_39
-; RV64ZVE32F-NEXT: .LBB102_38: # %cond.store49
+; RV64ZVE32F-NEXT: # %bb.38: # %cond.store49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -12660,7 +12110,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB102_62
-; RV64ZVE32F-NEXT: # %bb.40: # %else52
+; RV64ZVE32F-NEXT: .LBB102_40: # %else52
; RV64ZVE32F-NEXT: slli a2, a1, 36
; RV64ZVE32F-NEXT: bltz a2, .LBB102_63
; RV64ZVE32F-NEXT: .LBB102_41: # %else54
@@ -12669,7 +12119,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB102_42: # %else56
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bgez a2, .LBB102_44
-; RV64ZVE32F-NEXT: .LBB102_43: # %cond.store57
+; RV64ZVE32F-NEXT: # %bb.43: # %cond.store57
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -12711,8 +12161,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB102_6
+; RV64ZVE32F-NEXT: j .LBB102_5
; RV64ZVE32F-NEXT: .LBB102_50: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
@@ -12721,17 +12170,14 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB102_7
-; RV64ZVE32F-NEXT: j .LBB102_8
+; RV64ZVE32F-NEXT: j .LBB102_6
; RV64ZVE32F-NEXT: .LBB102_51: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: beqz a2, .LBB102_12
+; RV64ZVE32F-NEXT: j .LBB102_11
; RV64ZVE32F-NEXT: .LBB102_52: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
@@ -12740,25 +12186,21 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 7
; RV64ZVE32F-NEXT: vse8.v v13, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: beqz a2, .LBB102_13
+; RV64ZVE32F-NEXT: j .LBB102_12
; RV64ZVE32F-NEXT: .LBB102_53: # %cond.store15
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 8
; RV64ZVE32F-NEXT: vse8.v v13, (a2)
-; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: bnez a2, .LBB102_14
-; RV64ZVE32F-NEXT: j .LBB102_15
+; RV64ZVE32F-NEXT: j .LBB102_13
; RV64ZVE32F-NEXT: .LBB102_54: # %cond.store27
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 48
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_25
+; RV64ZVE32F-NEXT: j .LBB102_24
; RV64ZVE32F-NEXT: .LBB102_55: # %cond.store29
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
@@ -12767,8 +12209,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 47
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_26
+; RV64ZVE32F-NEXT: j .LBB102_25
; RV64ZVE32F-NEXT: .LBB102_56: # %cond.store31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -12776,9 +12217,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 46
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_27
-; RV64ZVE32F-NEXT: j .LBB102_28
+; RV64ZVE32F-NEXT: j .LBB102_26
; RV64ZVE32F-NEXT: .LBB102_57: # %cond.store35
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12786,8 +12225,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 44
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_30
+; RV64ZVE32F-NEXT: j .LBB102_29
; RV64ZVE32F-NEXT: .LBB102_58: # %cond.store37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
@@ -12797,9 +12235,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 43
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_31
-; RV64ZVE32F-NEXT: j .LBB102_32
+; RV64ZVE32F-NEXT: j .LBB102_30
; RV64ZVE32F-NEXT: .LBB102_59: # %cond.store43
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12807,8 +12243,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 40
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_36
+; RV64ZVE32F-NEXT: j .LBB102_35
; RV64ZVE32F-NEXT: .LBB102_60: # %cond.store45
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
@@ -12818,8 +12253,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 39
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_37
+; RV64ZVE32F-NEXT: j .LBB102_36
; RV64ZVE32F-NEXT: .LBB102_61: # %cond.store47
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -12827,9 +12261,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 38
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_38
-; RV64ZVE32F-NEXT: j .LBB102_39
+; RV64ZVE32F-NEXT: j .LBB102_37
; RV64ZVE32F-NEXT: .LBB102_62: # %cond.store51
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12837,8 +12269,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 36
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_41
+; RV64ZVE32F-NEXT: j .LBB102_40
; RV64ZVE32F-NEXT: .LBB102_63: # %cond.store53
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -12848,8 +12279,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 35
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_42
+; RV64ZVE32F-NEXT: j .LBB102_41
; RV64ZVE32F-NEXT: .LBB102_64: # %cond.store55
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -12857,9 +12287,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: slli a2, a1, 34
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_43
-; RV64ZVE32F-NEXT: j .LBB102_44
+; RV64ZVE32F-NEXT: j .LBB102_42
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %val, <32 x ptr> %ptrs, i32 1, <32 x i1> %m)
ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index 29fbb8acc3358..4dd07fdac72b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -253,7 +253,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: vmv.x.s a0, v0
; RV32-SLOW-NEXT: andi a1, a0, 1
; RV32-SLOW-NEXT: bnez a1, .LBB6_5
-; RV32-SLOW-NEXT: # %bb.1: # %else
+; RV32-SLOW-NEXT: .LBB6_1: # %else
; RV32-SLOW-NEXT: andi a1, a0, 2
; RV32-SLOW-NEXT: bnez a1, .LBB6_6
; RV32-SLOW-NEXT: .LBB6_2: # %else2
@@ -262,7 +262,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: .LBB6_3: # %else4
; RV32-SLOW-NEXT: andi a0, a0, 8
; RV32-SLOW-NEXT: bnez a0, .LBB6_8
-; RV32-SLOW-NEXT: .LBB6_4: # %else6
+; RV32-SLOW-NEXT: # %bb.4: # %else6
; RV32-SLOW-NEXT: ret
; RV32-SLOW-NEXT: .LBB6_5: # %cond.store
; RV32-SLOW-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -272,8 +272,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: srli a3, a1, 8
; RV32-SLOW-NEXT: sb a1, 0(a2)
; RV32-SLOW-NEXT: sb a3, 1(a2)
-; RV32-SLOW-NEXT: andi a1, a0, 2
-; RV32-SLOW-NEXT: beqz a1, .LBB6_2
+; RV32-SLOW-NEXT: j .LBB6_1
; RV32-SLOW-NEXT: .LBB6_6: # %cond.store1
; RV32-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v10, v8, 1
@@ -284,8 +283,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: srli a3, a1, 8
; RV32-SLOW-NEXT: sb a1, 0(a2)
; RV32-SLOW-NEXT: sb a3, 1(a2)
-; RV32-SLOW-NEXT: andi a1, a0, 4
-; RV32-SLOW-NEXT: beqz a1, .LBB6_3
+; RV32-SLOW-NEXT: j .LBB6_2
; RV32-SLOW-NEXT: .LBB6_7: # %cond.store3
; RV32-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v10, v8, 2
@@ -296,8 +294,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: srli a3, a1, 8
; RV32-SLOW-NEXT: sb a1, 0(a2)
; RV32-SLOW-NEXT: sb a3, 1(a2)
-; RV32-SLOW-NEXT: andi a0, a0, 8
-; RV32-SLOW-NEXT: beqz a0, .LBB6_4
+; RV32-SLOW-NEXT: j .LBB6_3
; RV32-SLOW-NEXT: .LBB6_8: # %cond.store5
; RV32-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 3
@@ -318,7 +315,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: vmv.x.s a0, v0
; RV64-SLOW-NEXT: andi a1, a0, 1
; RV64-SLOW-NEXT: bnez a1, .LBB6_5
-; RV64-SLOW-NEXT: # %bb.1: # %else
+; RV64-SLOW-NEXT: .LBB6_1: # %else
; RV64-SLOW-NEXT: andi a1, a0, 2
; RV64-SLOW-NEXT: bnez a1, .LBB6_6
; RV64-SLOW-NEXT: .LBB6_2: # %else2
@@ -327,7 +324,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: .LBB6_3: # %else4
; RV64-SLOW-NEXT: andi a0, a0, 8
; RV64-SLOW-NEXT: bnez a0, .LBB6_8
-; RV64-SLOW-NEXT: .LBB6_4: # %else6
+; RV64-SLOW-NEXT: # %bb.4: # %else6
; RV64-SLOW-NEXT: ret
; RV64-SLOW-NEXT: .LBB6_5: # %cond.store
; RV64-SLOW-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -337,8 +334,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: srli a3, a1, 8
; RV64-SLOW-NEXT: sb a1, 0(a2)
; RV64-SLOW-NEXT: sb a3, 1(a2)
-; RV64-SLOW-NEXT: andi a1, a0, 2
-; RV64-SLOW-NEXT: beqz a1, .LBB6_2
+; RV64-SLOW-NEXT: j .LBB6_1
; RV64-SLOW-NEXT: .LBB6_6: # %cond.store1
; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64-SLOW-NEXT: vslidedown.vi v9, v8, 1
@@ -349,8 +345,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: srli a3, a1, 8
; RV64-SLOW-NEXT: sb a1, 0(a2)
; RV64-SLOW-NEXT: sb a3, 1(a2)
-; RV64-SLOW-NEXT: andi a1, a0, 4
-; RV64-SLOW-NEXT: beqz a1, .LBB6_3
+; RV64-SLOW-NEXT: j .LBB6_2
; RV64-SLOW-NEXT: .LBB6_7: # %cond.store3
; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64-SLOW-NEXT: vslidedown.vi v9, v8, 2
@@ -361,8 +356,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: srli a3, a1, 8
; RV64-SLOW-NEXT: sb a1, 0(a2)
; RV64-SLOW-NEXT: sb a3, 1(a2)
-; RV64-SLOW-NEXT: andi a0, a0, 8
-; RV64-SLOW-NEXT: beqz a0, .LBB6_4
+; RV64-SLOW-NEXT: j .LBB6_3
; RV64-SLOW-NEXT: .LBB6_8: # %cond.store5
; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64-SLOW-NEXT: vslidedown.vi v12, v8, 3
@@ -401,10 +395,10 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV32-SLOW-NEXT: vmv.x.s a0, v0
; RV32-SLOW-NEXT: andi a1, a0, 1
; RV32-SLOW-NEXT: bnez a1, .LBB7_3
-; RV32-SLOW-NEXT: # %bb.1: # %else
+; RV32-SLOW-NEXT: .LBB7_1: # %else
; RV32-SLOW-NEXT: andi a0, a0, 2
; RV32-SLOW-NEXT: bnez a0, .LBB7_4
-; RV32-SLOW-NEXT: .LBB7_2: # %else2
+; RV32-SLOW-NEXT: # %bb.2: # %else2
; RV32-SLOW-NEXT: ret
; RV32-SLOW-NEXT: .LBB7_3: # %cond.store
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -413,8 +407,7 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV32-SLOW-NEXT: srli a3, a1, 16
; RV32-SLOW-NEXT: sh a1, 0(a2)
; RV32-SLOW-NEXT: sh a3, 2(a2)
-; RV32-SLOW-NEXT: andi a0, a0, 2
-; RV32-SLOW-NEXT: beqz a0, .LBB7_2
+; RV32-SLOW-NEXT: j .LBB7_1
; RV32-SLOW-NEXT: .LBB7_4: # %cond.store1
; RV32-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 1
@@ -432,10 +425,10 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV64-SLOW-NEXT: vmv.x.s a0, v0
; RV64-SLOW-NEXT: andi a1, a0, 1
; RV64-SLOW-NEXT: bnez a1, .LBB7_3
-; RV64-SLOW-NEXT: # %bb.1: # %else
+; RV64-SLOW-NEXT: .LBB7_1: # %else
; RV64-SLOW-NEXT: andi a0, a0, 2
; RV64-SLOW-NEXT: bnez a0, .LBB7_4
-; RV64-SLOW-NEXT: .LBB7_2: # %else2
+; RV64-SLOW-NEXT: # %bb.2: # %else2
; RV64-SLOW-NEXT: ret
; RV64-SLOW-NEXT: .LBB7_3: # %cond.store
; RV64-SLOW-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -445,8 +438,7 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV64-SLOW-NEXT: srli a3, a1, 16
; RV64-SLOW-NEXT: sh a1, 0(a2)
; RV64-SLOW-NEXT: sh a3, 2(a2)
-; RV64-SLOW-NEXT: andi a0, a0, 2
-; RV64-SLOW-NEXT: beqz a0, .LBB7_2
+; RV64-SLOW-NEXT: j .LBB7_1
; RV64-SLOW-NEXT: .LBB7_4: # %cond.store1
; RV64-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 1
@@ -591,10 +583,10 @@ define void @masked_store_v2i32_align2(<2 x i32> %val, ptr %a, <2 x i32> %m) nou
; SLOW-NEXT: vmv.x.s a1, v9
; SLOW-NEXT: andi a2, a1, 1
; SLOW-NEXT: bnez a2, .LBB9_3
-; SLOW-NEXT: # %bb.1: # %else
+; SLOW-NEXT: .LBB9_1: # %else
; SLOW-NEXT: andi a1, a1, 2
; SLOW-NEXT: bnez a1, .LBB9_4
-; SLOW-NEXT: .LBB9_2: # %else2
+; SLOW-NEXT: # %bb.2: # %else2
; SLOW-NEXT: ret
; SLOW-NEXT: .LBB9_3: # %cond.store
; SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -602,8 +594,7 @@ define void @masked_store_v2i32_align2(<2 x i32> %val, ptr %a, <2 x i32> %m) nou
; SLOW-NEXT: srli a3, a2, 16
; SLOW-NEXT: sh a2, 0(a0)
; SLOW-NEXT: sh a3, 2(a0)
-; SLOW-NEXT: andi a1, a1, 2
-; SLOW-NEXT: beqz a1, .LBB9_2
+; SLOW-NEXT: j .LBB9_1
; SLOW-NEXT: .LBB9_4: # %cond.store1
; SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; SLOW-NEXT: vslidedown.vi v8, v8, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 0640a6f3af257..9498446f5982b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -135,13 +135,13 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a6, a3, -1
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a6, .LBB3_10
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB3_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a6, .LBB3_11
; CHECK-NOV-NEXT: .LBB3_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a4, a6, .LBB3_12
-; CHECK-NOV-NEXT: .LBB3_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bge a5, a6, .LBB3_13
; CHECK-NOV-NEXT: .LBB3_4: # %entry
; CHECK-NOV-NEXT: bge a3, a5, .LBB3_14
@@ -161,12 +161,10 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB3_10: # %entry
; CHECK-NOV-NEXT: mv a1, a6
-; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a6, .LBB3_2
+; CHECK-NOV-NEXT: j .LBB3_1
; CHECK-NOV-NEXT: .LBB3_11: # %entry
; CHECK-NOV-NEXT: mv a2, a6
-; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
-; CHECK-NOV-NEXT: blt a4, a6, .LBB3_3
+; CHECK-NOV-NEXT: j .LBB3_2
; CHECK-NOV-NEXT: .LBB3_12: # %entry
; CHECK-NOV-NEXT: mv a4, a6
; CHECK-NOV-NEXT: blt a5, a6, .LBB3_4
@@ -208,13 +206,13 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: srli a3, a3, 32
; CHECK-NOV-NEXT: fcvt.lu.s a2, fa1, rtz
; CHECK-NOV-NEXT: bgeu a1, a3, .LBB4_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB4_1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz
; CHECK-NOV-NEXT: bgeu a2, a3, .LBB4_7
; CHECK-NOV-NEXT: .LBB4_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz
; CHECK-NOV-NEXT: bgeu a4, a3, .LBB4_8
-; CHECK-NOV-NEXT: .LBB4_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bltu a5, a3, .LBB4_5
; CHECK-NOV-NEXT: .LBB4_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -226,12 +224,10 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB4_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz
-; CHECK-NOV-NEXT: bltu a2, a3, .LBB4_2
+; CHECK-NOV-NEXT: j .LBB4_1
; CHECK-NOV-NEXT: .LBB4_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz
-; CHECK-NOV-NEXT: bltu a4, a3, .LBB4_3
+; CHECK-NOV-NEXT: j .LBB4_2
; CHECK-NOV-NEXT: .LBB4_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bgeu a5, a3, .LBB4_4
@@ -259,13 +255,13 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: srli a4, a4, 32
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB5_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB5_7
; CHECK-NOV-NEXT: .LBB5_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a3, a4, .LBB5_8
-; CHECK-NOV-NEXT: .LBB5_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: blt a5, a4, .LBB5_5
; CHECK-NOV-NEXT: .LBB5_4: # %entry
; CHECK-NOV-NEXT: mv a5, a4
@@ -289,12 +285,10 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB5_6: # %entry
; CHECK-NOV-NEXT: mv a1, a4
-; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a4, .LBB5_2
+; CHECK-NOV-NEXT: j .LBB5_1
; CHECK-NOV-NEXT: .LBB5_7: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
-; CHECK-NOV-NEXT: blt a3, a4, .LBB5_3
+; CHECK-NOV-NEXT: j .LBB5_2
; CHECK-NOV-NEXT: .LBB5_8: # %entry
; CHECK-NOV-NEXT: mv a3, a4
; CHECK-NOV-NEXT: bge a5, a4, .LBB5_4
@@ -362,13 +356,13 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: lui a1, 524288
; CHECK-NOV-NEXT: addiw a4, a1, -1
; CHECK-NOV-NEXT: bge a0, a4, .LBB6_10
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB6_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
; CHECK-NOV-NEXT: bge s1, a4, .LBB6_11
; CHECK-NOV-NEXT: .LBB6_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB6_12
-; CHECK-NOV-NEXT: .LBB6_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bge a3, a4, .LBB6_13
; CHECK-NOV-NEXT: .LBB6_4: # %entry
; CHECK-NOV-NEXT: bge a1, a3, .LBB6_14
@@ -407,12 +401,10 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB6_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a4
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
-; CHECK-NOV-NEXT: blt s1, a4, .LBB6_2
+; CHECK-NOV-NEXT: j .LBB6_1
; CHECK-NOV-NEXT: .LBB6_11: # %entry
; CHECK-NOV-NEXT: mv s1, a4
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
-; CHECK-NOV-NEXT: blt a2, a4, .LBB6_3
+; CHECK-NOV-NEXT: j .LBB6_2
; CHECK-NOV-NEXT: .LBB6_12: # %entry
; CHECK-NOV-NEXT: mv a2, a4
; CHECK-NOV-NEXT: blt a3, a4, .LBB6_4
@@ -562,13 +554,13 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: li a1, -1
; CHECK-NOV-NEXT: srli a1, a1, 32
; CHECK-NOV-NEXT: bgeu a0, a1, .LBB7_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB7_1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
; CHECK-NOV-NEXT: bgeu s1, a1, .LBB7_7
; CHECK-NOV-NEXT: .LBB7_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
; CHECK-NOV-NEXT: bgeu a2, a1, .LBB7_8
-; CHECK-NOV-NEXT: .LBB7_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bltu a3, a1, .LBB7_5
; CHECK-NOV-NEXT: .LBB7_4: # %entry
; CHECK-NOV-NEXT: mv a3, a1
@@ -599,12 +591,10 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB7_6: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a1
-; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
-; CHECK-NOV-NEXT: bltu s1, a1, .LBB7_2
+; CHECK-NOV-NEXT: j .LBB7_1
; CHECK-NOV-NEXT: .LBB7_7: # %entry
; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
-; CHECK-NOV-NEXT: bltu a2, a1, .LBB7_3
+; CHECK-NOV-NEXT: j .LBB7_2
; CHECK-NOV-NEXT: .LBB7_8: # %entry
; CHECK-NOV-NEXT: mv a2, a1
; CHECK-NOV-NEXT: bgeu a3, a1, .LBB7_4
@@ -740,13 +730,13 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: li a2, -1
; CHECK-NOV-NEXT: srli a2, a2, 32
; CHECK-NOV-NEXT: bge a0, a2, .LBB8_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB8_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
; CHECK-NOV-NEXT: bge s1, a2, .LBB8_7
; CHECK-NOV-NEXT: .LBB8_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a1, a2, .LBB8_8
-; CHECK-NOV-NEXT: .LBB8_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: blt a3, a2, .LBB8_5
; CHECK-NOV-NEXT: .LBB8_4: # %entry
; CHECK-NOV-NEXT: mv a3, a2
@@ -789,12 +779,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB8_6: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
-; CHECK-NOV-NEXT: blt s1, a2, .LBB8_2
+; CHECK-NOV-NEXT: j .LBB8_1
; CHECK-NOV-NEXT: .LBB8_7: # %entry
; CHECK-NOV-NEXT: mv s1, a2
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
-; CHECK-NOV-NEXT: blt a1, a2, .LBB8_3
+; CHECK-NOV-NEXT: j .LBB8_2
; CHECK-NOV-NEXT: .LBB8_8: # %entry
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: bge a3, a2, .LBB8_4
@@ -905,7 +893,7 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
; CHECK-NOV-NEXT: .LBB9_2: # %entry
; CHECK-NOV-NEXT: lui a2, 1048568
; CHECK-NOV-NEXT: bge a2, a0, .LBB9_7
-; CHECK-NOV-NEXT: .LBB9_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bge a2, a1, .LBB9_8
; CHECK-NOV-NEXT: .LBB9_4: # %entry
; CHECK-NOV-NEXT: ret
@@ -914,8 +902,7 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
; CHECK-NOV-NEXT: blt a0, a2, .LBB9_2
; CHECK-NOV-NEXT: .LBB9_6: # %entry
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: lui a2, 1048568
-; CHECK-NOV-NEXT: blt a2, a0, .LBB9_3
+; CHECK-NOV-NEXT: j .LBB9_2
; CHECK-NOV-NEXT: .LBB9_7: # %entry
; CHECK-NOV-NEXT: lui a0, 1048568
; CHECK-NOV-NEXT: blt a2, a1, .LBB9_4
@@ -1023,18 +1010,18 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a5, a5, -1
; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a5, .LBB12_10
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB12_1: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a5, .LBB12_11
; CHECK-NOV-NEXT: .LBB12_2: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
; CHECK-NOV-NEXT: bge a3, a5, .LBB12_12
-; CHECK-NOV-NEXT: .LBB12_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bge a4, a5, .LBB12_13
; CHECK-NOV-NEXT: .LBB12_4: # %entry
; CHECK-NOV-NEXT: lui a5, 1048568
; CHECK-NOV-NEXT: bge a5, a4, .LBB12_14
-; CHECK-NOV-NEXT: .LBB12_5: # %entry
+; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: bge a5, a3, .LBB12_15
; CHECK-NOV-NEXT: .LBB12_6: # %entry
; CHECK-NOV-NEXT: bge a5, a2, .LBB12_16
@@ -1050,19 +1037,16 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB12_10: # %entry
; CHECK-NOV-NEXT: mv a1, a5
-; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a5, .LBB12_2
+; CHECK-NOV-NEXT: j .LBB12_1
; CHECK-NOV-NEXT: .LBB12_11: # %entry
; CHECK-NOV-NEXT: mv a2, a5
-; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
-; CHECK-NOV-NEXT: blt a3, a5, .LBB12_3
+; CHECK-NOV-NEXT: j .LBB12_2
; CHECK-NOV-NEXT: .LBB12_12: # %entry
; CHECK-NOV-NEXT: mv a3, a5
; CHECK-NOV-NEXT: blt a4, a5, .LBB12_4
; CHECK-NOV-NEXT: .LBB12_13: # %entry
; CHECK-NOV-NEXT: mv a4, a5
-; CHECK-NOV-NEXT: lui a5, 1048568
-; CHECK-NOV-NEXT: blt a5, a4, .LBB12_5
+; CHECK-NOV-NEXT: j .LBB12_4
; CHECK-NOV-NEXT: .LBB12_14: # %entry
; CHECK-NOV-NEXT: lui a4, 1048568
; CHECK-NOV-NEXT: blt a5, a3, .LBB12_6
@@ -1099,13 +1083,13 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: fcvt.wu.s a2, fa1, rtz
; CHECK-NOV-NEXT: bgeu a1, a3, .LBB13_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB13_1: # %entry
; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz
; CHECK-NOV-NEXT: bgeu a2, a3, .LBB13_7
; CHECK-NOV-NEXT: .LBB13_2: # %entry
; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz
; CHECK-NOV-NEXT: bgeu a4, a3, .LBB13_8
-; CHECK-NOV-NEXT: .LBB13_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bltu a5, a3, .LBB13_5
; CHECK-NOV-NEXT: .LBB13_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -1117,12 +1101,10 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB13_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz
-; CHECK-NOV-NEXT: bltu a2, a3, .LBB13_2
+; CHECK-NOV-NEXT: j .LBB13_1
; CHECK-NOV-NEXT: .LBB13_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz
-; CHECK-NOV-NEXT: bltu a4, a3, .LBB13_3
+; CHECK-NOV-NEXT: j .LBB13_2
; CHECK-NOV-NEXT: .LBB13_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bgeu a5, a3, .LBB13_4
@@ -1151,13 +1133,13 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a4, a4, -1
; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB14_1: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB14_7
; CHECK-NOV-NEXT: .LBB14_2: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a3, a4, .LBB14_8
-; CHECK-NOV-NEXT: .LBB14_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: blt a5, a4, .LBB14_5
; CHECK-NOV-NEXT: .LBB14_4: # %entry
; CHECK-NOV-NEXT: mv a5, a4
@@ -1181,12 +1163,10 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB14_6: # %entry
; CHECK-NOV-NEXT: mv a1, a4
-; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a4, .LBB14_2
+; CHECK-NOV-NEXT: j .LBB14_1
; CHECK-NOV-NEXT: .LBB14_7: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
-; CHECK-NOV-NEXT: blt a3, a4, .LBB14_3
+; CHECK-NOV-NEXT: j .LBB14_2
; CHECK-NOV-NEXT: .LBB14_8: # %entry
; CHECK-NOV-NEXT: mv a3, a4
; CHECK-NOV-NEXT: bge a5, a4, .LBB14_4
@@ -1285,7 +1265,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a7, 8
; CHECK-NOV-NEXT: addiw a7, a7, -1
; CHECK-NOV-NEXT: bge a0, a7, .LBB15_18
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB15_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
; CHECK-NOV-NEXT: bge s1, a7, .LBB15_19
; CHECK-NOV-NEXT: .LBB15_2: # %entry
@@ -1303,12 +1283,12 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB15_6: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
; CHECK-NOV-NEXT: bge a5, a7, .LBB15_24
-; CHECK-NOV-NEXT: .LBB15_7: # %entry
+; CHECK-NOV-NEXT: # %bb.7: # %entry
; CHECK-NOV-NEXT: bge a6, a7, .LBB15_25
; CHECK-NOV-NEXT: .LBB15_8: # %entry
; CHECK-NOV-NEXT: lui a7, 1048568
; CHECK-NOV-NEXT: bge a7, a6, .LBB15_26
-; CHECK-NOV-NEXT: .LBB15_9: # %entry
+; CHECK-NOV-NEXT: # %bb.9: # %entry
; CHECK-NOV-NEXT: bge a7, a5, .LBB15_27
; CHECK-NOV-NEXT: .LBB15_10: # %entry
; CHECK-NOV-NEXT: bge a7, a4, .LBB15_28
@@ -1371,35 +1351,28 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB15_18: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a7
-; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: blt s1, a7, .LBB15_2
+; CHECK-NOV-NEXT: j .LBB15_1
; CHECK-NOV-NEXT: .LBB15_19: # %entry
; CHECK-NOV-NEXT: mv s1, a7
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz
-; CHECK-NOV-NEXT: blt a1, a7, .LBB15_3
+; CHECK-NOV-NEXT: j .LBB15_2
; CHECK-NOV-NEXT: .LBB15_20: # %entry
; CHECK-NOV-NEXT: mv a1, a7
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz
-; CHECK-NOV-NEXT: blt a3, a7, .LBB15_4
+; CHECK-NOV-NEXT: j .LBB15_3
; CHECK-NOV-NEXT: .LBB15_21: # %entry
; CHECK-NOV-NEXT: mv a3, a7
-; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
-; CHECK-NOV-NEXT: blt a2, a7, .LBB15_5
+; CHECK-NOV-NEXT: j .LBB15_4
; CHECK-NOV-NEXT: .LBB15_22: # %entry
; CHECK-NOV-NEXT: mv a2, a7
-; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
-; CHECK-NOV-NEXT: blt a4, a7, .LBB15_6
+; CHECK-NOV-NEXT: j .LBB15_5
; CHECK-NOV-NEXT: .LBB15_23: # %entry
; CHECK-NOV-NEXT: mv a4, a7
-; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
-; CHECK-NOV-NEXT: blt a5, a7, .LBB15_7
+; CHECK-NOV-NEXT: j .LBB15_6
; CHECK-NOV-NEXT: .LBB15_24: # %entry
; CHECK-NOV-NEXT: mv a5, a7
; CHECK-NOV-NEXT: blt a6, a7, .LBB15_8
; CHECK-NOV-NEXT: .LBB15_25: # %entry
; CHECK-NOV-NEXT: mv a6, a7
-; CHECK-NOV-NEXT: lui a7, 1048568
-; CHECK-NOV-NEXT: blt a7, a6, .LBB15_9
+; CHECK-NOV-NEXT: j .LBB15_8
; CHECK-NOV-NEXT: .LBB15_26: # %entry
; CHECK-NOV-NEXT: lui a6, 1048568
; CHECK-NOV-NEXT: blt a7, a5, .LBB15_10
@@ -1669,7 +1642,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a3, 16
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: bgeu a0, a3, .LBB16_10
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB16_1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
; CHECK-NOV-NEXT: bgeu s1, a3, .LBB16_11
; CHECK-NOV-NEXT: .LBB16_2: # %entry
@@ -1687,7 +1660,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB16_6: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
; CHECK-NOV-NEXT: bgeu a6, a3, .LBB16_16
-; CHECK-NOV-NEXT: .LBB16_7: # %entry
+; CHECK-NOV-NEXT: # %bb.7: # %entry
; CHECK-NOV-NEXT: bltu a7, a3, .LBB16_9
; CHECK-NOV-NEXT: .LBB16_8: # %entry
; CHECK-NOV-NEXT: mv a7, a3
@@ -1738,28 +1711,22 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB16_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
-; CHECK-NOV-NEXT: bltu s1, a3, .LBB16_2
+; CHECK-NOV-NEXT: j .LBB16_1
; CHECK-NOV-NEXT: .LBB16_11: # %entry
; CHECK-NOV-NEXT: mv s1, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz
-; CHECK-NOV-NEXT: bltu a1, a3, .LBB16_3
+; CHECK-NOV-NEXT: j .LBB16_2
; CHECK-NOV-NEXT: .LBB16_12: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz
-; CHECK-NOV-NEXT: bltu a2, a3, .LBB16_4
+; CHECK-NOV-NEXT: j .LBB16_3
; CHECK-NOV-NEXT: .LBB16_13: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz
-; CHECK-NOV-NEXT: bltu a4, a3, .LBB16_5
+; CHECK-NOV-NEXT: j .LBB16_4
; CHECK-NOV-NEXT: .LBB16_14: # %entry
; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz
-; CHECK-NOV-NEXT: bltu a5, a3, .LBB16_6
+; CHECK-NOV-NEXT: j .LBB16_5
; CHECK-NOV-NEXT: .LBB16_15: # %entry
; CHECK-NOV-NEXT: mv a5, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
-; CHECK-NOV-NEXT: bltu a6, a3, .LBB16_7
+; CHECK-NOV-NEXT: j .LBB16_6
; CHECK-NOV-NEXT: .LBB16_16: # %entry
; CHECK-NOV-NEXT: mv a6, a3
; CHECK-NOV-NEXT: bgeu a7, a3, .LBB16_8
@@ -2009,7 +1976,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a4, 16
; CHECK-NOV-NEXT: addiw a4, a4, -1
; CHECK-NOV-NEXT: bge a0, a4, .LBB17_10
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB17_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
; CHECK-NOV-NEXT: bge s1, a4, .LBB17_11
; CHECK-NOV-NEXT: .LBB17_2: # %entry
@@ -2027,7 +1994,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB17_6: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
; CHECK-NOV-NEXT: bge a6, a4, .LBB17_16
-; CHECK-NOV-NEXT: .LBB17_7: # %entry
+; CHECK-NOV-NEXT: # %bb.7: # %entry
; CHECK-NOV-NEXT: blt a7, a4, .LBB17_9
; CHECK-NOV-NEXT: .LBB17_8: # %entry
; CHECK-NOV-NEXT: mv a7, a4
@@ -2102,28 +2069,22 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB17_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a4
-; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: blt s1, a4, .LBB17_2
+; CHECK-NOV-NEXT: j .LBB17_1
; CHECK-NOV-NEXT: .LBB17_11: # %entry
; CHECK-NOV-NEXT: mv s1, a4
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
-; CHECK-NOV-NEXT: blt a1, a4, .LBB17_3
+; CHECK-NOV-NEXT: j .LBB17_2
; CHECK-NOV-NEXT: .LBB17_12: # %entry
; CHECK-NOV-NEXT: mv a1, a4
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz
-; CHECK-NOV-NEXT: blt a2, a4, .LBB17_4
+; CHECK-NOV-NEXT: j .LBB17_3
; CHECK-NOV-NEXT: .LBB17_13: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz
-; CHECK-NOV-NEXT: blt a3, a4, .LBB17_5
+; CHECK-NOV-NEXT: j .LBB17_4
; CHECK-NOV-NEXT: .LBB17_14: # %entry
; CHECK-NOV-NEXT: mv a3, a4
-; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz
-; CHECK-NOV-NEXT: blt a5, a4, .LBB17_6
+; CHECK-NOV-NEXT: j .LBB17_5
; CHECK-NOV-NEXT: .LBB17_15: # %entry
; CHECK-NOV-NEXT: mv a5, a4
-; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
-; CHECK-NOV-NEXT: blt a6, a4, .LBB17_7
+; CHECK-NOV-NEXT: j .LBB17_6
; CHECK-NOV-NEXT: .LBB17_16: # %entry
; CHECK-NOV-NEXT: mv a6, a4
; CHECK-NOV-NEXT: bge a7, a4, .LBB17_8
@@ -2612,21 +2573,21 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a0
-; CHECK-NOV-NEXT: beqz a1, .LBB20_7
+; CHECK-NOV-NEXT: beqz a1, .LBB20_8
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a0, a1
+; CHECK-NOV-NEXT: .LBB20_6: # %entry
; CHECK-NOV-NEXT: and a1, a4, s0
-; CHECK-NOV-NEXT: bnez a2, .LBB20_8
-; CHECK-NOV-NEXT: .LBB20_6:
+; CHECK-NOV-NEXT: bnez a2, .LBB20_9
+; CHECK-NOV-NEXT: # %bb.7:
; CHECK-NOV-NEXT: snez a2, a1
-; CHECK-NOV-NEXT: j .LBB20_9
-; CHECK-NOV-NEXT: .LBB20_7:
+; CHECK-NOV-NEXT: j .LBB20_10
+; CHECK-NOV-NEXT: .LBB20_8:
; CHECK-NOV-NEXT: snez a0, a3
-; CHECK-NOV-NEXT: and a1, a4, s0
-; CHECK-NOV-NEXT: beqz a2, .LBB20_6
-; CHECK-NOV-NEXT: .LBB20_8: # %entry
-; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: j .LBB20_6
; CHECK-NOV-NEXT: .LBB20_9: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: .LBB20_10: # %entry
; CHECK-NOV-NEXT: neg a2, a2
; CHECK-NOV-NEXT: neg a4, a0
; CHECK-NOV-NEXT: and a0, a2, a1
@@ -2680,21 +2641,21 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: neg a4, a4
; CHECK-V-NEXT: and a0, a4, a0
-; CHECK-V-NEXT: beqz a1, .LBB20_7
+; CHECK-V-NEXT: beqz a1, .LBB20_8
; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a1
+; CHECK-V-NEXT: .LBB20_6: # %entry
; CHECK-V-NEXT: and a3, a3, s0
-; CHECK-V-NEXT: bnez a2, .LBB20_8
-; CHECK-V-NEXT: .LBB20_6:
+; CHECK-V-NEXT: bnez a2, .LBB20_9
+; CHECK-V-NEXT: # %bb.7:
; CHECK-V-NEXT: snez a2, a3
-; CHECK-V-NEXT: j .LBB20_9
-; CHECK-V-NEXT: .LBB20_7:
+; CHECK-V-NEXT: j .LBB20_10
+; CHECK-V-NEXT: .LBB20_8:
; CHECK-V-NEXT: snez a1, a0
-; CHECK-V-NEXT: and a3, a3, s0
-; CHECK-V-NEXT: beqz a2, .LBB20_6
-; CHECK-V-NEXT: .LBB20_8: # %entry
-; CHECK-V-NEXT: sgtz a2, a2
+; CHECK-V-NEXT: j .LBB20_6
; CHECK-V-NEXT: .LBB20_9: # %entry
+; CHECK-V-NEXT: sgtz a2, a2
+; CHECK-V-NEXT: .LBB20_10: # %entry
; CHECK-V-NEXT: neg a2, a2
; CHECK-V-NEXT: neg a1, a1
; CHECK-V-NEXT: and a2, a2, a3
@@ -3034,21 +2995,21 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a0
-; CHECK-NOV-NEXT: beqz a1, .LBB23_7
+; CHECK-NOV-NEXT: beqz a1, .LBB23_8
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a0, a1
+; CHECK-NOV-NEXT: .LBB23_6: # %entry
; CHECK-NOV-NEXT: and a1, a4, s0
-; CHECK-NOV-NEXT: bnez a2, .LBB23_8
-; CHECK-NOV-NEXT: .LBB23_6:
+; CHECK-NOV-NEXT: bnez a2, .LBB23_9
+; CHECK-NOV-NEXT: # %bb.7:
; CHECK-NOV-NEXT: snez a2, a1
-; CHECK-NOV-NEXT: j .LBB23_9
-; CHECK-NOV-NEXT: .LBB23_7:
+; CHECK-NOV-NEXT: j .LBB23_10
+; CHECK-NOV-NEXT: .LBB23_8:
; CHECK-NOV-NEXT: snez a0, a3
-; CHECK-NOV-NEXT: and a1, a4, s0
-; CHECK-NOV-NEXT: beqz a2, .LBB23_6
-; CHECK-NOV-NEXT: .LBB23_8: # %entry
-; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: j .LBB23_6
; CHECK-NOV-NEXT: .LBB23_9: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: .LBB23_10: # %entry
; CHECK-NOV-NEXT: neg a2, a2
; CHECK-NOV-NEXT: neg a4, a0
; CHECK-NOV-NEXT: and a0, a2, a1
@@ -3102,21 +3063,21 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: neg a4, a4
; CHECK-V-NEXT: and a0, a4, a0
-; CHECK-V-NEXT: beqz a1, .LBB23_7
+; CHECK-V-NEXT: beqz a1, .LBB23_8
; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a1
+; CHECK-V-NEXT: .LBB23_6: # %entry
; CHECK-V-NEXT: and a3, a3, s0
-; CHECK-V-NEXT: bnez a2, .LBB23_8
-; CHECK-V-NEXT: .LBB23_6:
+; CHECK-V-NEXT: bnez a2, .LBB23_9
+; CHECK-V-NEXT: # %bb.7:
; CHECK-V-NEXT: snez a2, a3
-; CHECK-V-NEXT: j .LBB23_9
-; CHECK-V-NEXT: .LBB23_7:
+; CHECK-V-NEXT: j .LBB23_10
+; CHECK-V-NEXT: .LBB23_8:
; CHECK-V-NEXT: snez a1, a0
-; CHECK-V-NEXT: and a3, a3, s0
-; CHECK-V-NEXT: beqz a2, .LBB23_6
-; CHECK-V-NEXT: .LBB23_8: # %entry
-; CHECK-V-NEXT: sgtz a2, a2
+; CHECK-V-NEXT: j .LBB23_6
; CHECK-V-NEXT: .LBB23_9: # %entry
+; CHECK-V-NEXT: sgtz a2, a2
+; CHECK-V-NEXT: .LBB23_10: # %entry
; CHECK-V-NEXT: neg a2, a2
; CHECK-V-NEXT: neg a1, a1
; CHECK-V-NEXT: and a2, a2, a3
@@ -3459,21 +3420,21 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a0
-; CHECK-NOV-NEXT: beqz a1, .LBB26_7
+; CHECK-NOV-NEXT: beqz a1, .LBB26_8
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a0, a1
+; CHECK-NOV-NEXT: .LBB26_6: # %entry
; CHECK-NOV-NEXT: and a1, a4, s0
-; CHECK-NOV-NEXT: bnez a2, .LBB26_8
-; CHECK-NOV-NEXT: .LBB26_6:
+; CHECK-NOV-NEXT: bnez a2, .LBB26_9
+; CHECK-NOV-NEXT: # %bb.7:
; CHECK-NOV-NEXT: snez a2, a1
-; CHECK-NOV-NEXT: j .LBB26_9
-; CHECK-NOV-NEXT: .LBB26_7:
+; CHECK-NOV-NEXT: j .LBB26_10
+; CHECK-NOV-NEXT: .LBB26_8:
; CHECK-NOV-NEXT: snez a0, a3
-; CHECK-NOV-NEXT: and a1, a4, s0
-; CHECK-NOV-NEXT: beqz a2, .LBB26_6
-; CHECK-NOV-NEXT: .LBB26_8: # %entry
-; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: j .LBB26_6
; CHECK-NOV-NEXT: .LBB26_9: # %entry
+; CHECK-NOV-NEXT: sgtz a2, a2
+; CHECK-NOV-NEXT: .LBB26_10: # %entry
; CHECK-NOV-NEXT: neg a2, a2
; CHECK-NOV-NEXT: neg a4, a0
; CHECK-NOV-NEXT: and a0, a2, a1
@@ -3525,21 +3486,21 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: neg a4, a4
; CHECK-V-NEXT: and a0, a4, a0
-; CHECK-V-NEXT: beqz a1, .LBB26_7
+; CHECK-V-NEXT: beqz a1, .LBB26_8
; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a1
+; CHECK-V-NEXT: .LBB26_6: # %entry
; CHECK-V-NEXT: and a3, a3, s0
-; CHECK-V-NEXT: bnez a2, .LBB26_8
-; CHECK-V-NEXT: .LBB26_6:
+; CHECK-V-NEXT: bnez a2, .LBB26_9
+; CHECK-V-NEXT: # %bb.7:
; CHECK-V-NEXT: snez a2, a3
-; CHECK-V-NEXT: j .LBB26_9
-; CHECK-V-NEXT: .LBB26_7:
+; CHECK-V-NEXT: j .LBB26_10
+; CHECK-V-NEXT: .LBB26_8:
; CHECK-V-NEXT: snez a1, a0
-; CHECK-V-NEXT: and a3, a3, s0
-; CHECK-V-NEXT: beqz a2, .LBB26_6
-; CHECK-V-NEXT: .LBB26_8: # %entry
-; CHECK-V-NEXT: sgtz a2, a2
+; CHECK-V-NEXT: j .LBB26_6
; CHECK-V-NEXT: .LBB26_9: # %entry
+; CHECK-V-NEXT: sgtz a2, a2
+; CHECK-V-NEXT: .LBB26_10: # %entry
; CHECK-V-NEXT: neg a2, a2
; CHECK-V-NEXT: neg a1, a1
; CHECK-V-NEXT: and a2, a2, a3
@@ -3697,13 +3658,13 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a6, a3, -1
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a6, .LBB30_10
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB30_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a6, .LBB30_11
; CHECK-NOV-NEXT: .LBB30_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a4, a6, .LBB30_12
-; CHECK-NOV-NEXT: .LBB30_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bge a5, a6, .LBB30_13
; CHECK-NOV-NEXT: .LBB30_4: # %entry
; CHECK-NOV-NEXT: bge a3, a5, .LBB30_14
@@ -3723,12 +3684,10 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB30_10: # %entry
; CHECK-NOV-NEXT: mv a1, a6
-; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a6, .LBB30_2
+; CHECK-NOV-NEXT: j .LBB30_1
; CHECK-NOV-NEXT: .LBB30_11: # %entry
; CHECK-NOV-NEXT: mv a2, a6
-; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
-; CHECK-NOV-NEXT: blt a4, a6, .LBB30_3
+; CHECK-NOV-NEXT: j .LBB30_2
; CHECK-NOV-NEXT: .LBB30_12: # %entry
; CHECK-NOV-NEXT: mv a4, a6
; CHECK-NOV-NEXT: blt a5, a6, .LBB30_4
@@ -3768,13 +3727,13 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: srli a3, a3, 32
; CHECK-NOV-NEXT: fcvt.lu.s a2, fa1, rtz
; CHECK-NOV-NEXT: bgeu a1, a3, .LBB31_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB31_1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz
; CHECK-NOV-NEXT: bgeu a2, a3, .LBB31_7
; CHECK-NOV-NEXT: .LBB31_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz
; CHECK-NOV-NEXT: bgeu a4, a3, .LBB31_8
-; CHECK-NOV-NEXT: .LBB31_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bltu a5, a3, .LBB31_5
; CHECK-NOV-NEXT: .LBB31_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -3786,12 +3745,10 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB31_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz
-; CHECK-NOV-NEXT: bltu a2, a3, .LBB31_2
+; CHECK-NOV-NEXT: j .LBB31_1
; CHECK-NOV-NEXT: .LBB31_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz
-; CHECK-NOV-NEXT: bltu a4, a3, .LBB31_3
+; CHECK-NOV-NEXT: j .LBB31_2
; CHECK-NOV-NEXT: .LBB31_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bgeu a5, a3, .LBB31_4
@@ -3818,13 +3775,13 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: srli a3, a3, 32
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a3, .LBB32_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB32_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a3, .LBB32_7
; CHECK-NOV-NEXT: .LBB32_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a4, a3, .LBB32_8
-; CHECK-NOV-NEXT: .LBB32_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: blt a5, a3, .LBB32_5
; CHECK-NOV-NEXT: .LBB32_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -3848,12 +3805,10 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB32_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a3, .LBB32_2
+; CHECK-NOV-NEXT: j .LBB32_1
; CHECK-NOV-NEXT: .LBB32_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
-; CHECK-NOV-NEXT: blt a4, a3, .LBB32_3
+; CHECK-NOV-NEXT: j .LBB32_2
; CHECK-NOV-NEXT: .LBB32_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bge a5, a3, .LBB32_4
@@ -3919,13 +3874,13 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: lui a1, 524288
; CHECK-NOV-NEXT: addiw a4, a1, -1
; CHECK-NOV-NEXT: bge a0, a4, .LBB33_10
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB33_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
; CHECK-NOV-NEXT: bge s1, a4, .LBB33_11
; CHECK-NOV-NEXT: .LBB33_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB33_12
-; CHECK-NOV-NEXT: .LBB33_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bge a3, a4, .LBB33_13
; CHECK-NOV-NEXT: .LBB33_4: # %entry
; CHECK-NOV-NEXT: bge a1, a3, .LBB33_14
@@ -3964,12 +3919,10 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB33_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a4
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
-; CHECK-NOV-NEXT: blt s1, a4, .LBB33_2
+; CHECK-NOV-NEXT: j .LBB33_1
; CHECK-NOV-NEXT: .LBB33_11: # %entry
; CHECK-NOV-NEXT: mv s1, a4
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
-; CHECK-NOV-NEXT: blt a2, a4, .LBB33_3
+; CHECK-NOV-NEXT: j .LBB33_2
; CHECK-NOV-NEXT: .LBB33_12: # %entry
; CHECK-NOV-NEXT: mv a2, a4
; CHECK-NOV-NEXT: blt a3, a4, .LBB33_4
@@ -4117,13 +4070,13 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: li a1, -1
; CHECK-NOV-NEXT: srli a1, a1, 32
; CHECK-NOV-NEXT: bgeu a0, a1, .LBB34_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB34_1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
; CHECK-NOV-NEXT: bgeu s1, a1, .LBB34_7
; CHECK-NOV-NEXT: .LBB34_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
; CHECK-NOV-NEXT: bgeu a2, a1, .LBB34_8
-; CHECK-NOV-NEXT: .LBB34_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bltu a3, a1, .LBB34_5
; CHECK-NOV-NEXT: .LBB34_4: # %entry
; CHECK-NOV-NEXT: mv a3, a1
@@ -4154,12 +4107,10 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB34_6: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a1
-; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
-; CHECK-NOV-NEXT: bltu s1, a1, .LBB34_2
+; CHECK-NOV-NEXT: j .LBB34_1
; CHECK-NOV-NEXT: .LBB34_7: # %entry
; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
-; CHECK-NOV-NEXT: bltu a2, a1, .LBB34_3
+; CHECK-NOV-NEXT: j .LBB34_2
; CHECK-NOV-NEXT: .LBB34_8: # %entry
; CHECK-NOV-NEXT: mv a2, a1
; CHECK-NOV-NEXT: bgeu a3, a1, .LBB34_4
@@ -4294,13 +4245,13 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: li a2, -1
; CHECK-NOV-NEXT: srli a2, a2, 32
; CHECK-NOV-NEXT: bge a0, a2, .LBB35_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB35_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
; CHECK-NOV-NEXT: bge s1, a2, .LBB35_7
; CHECK-NOV-NEXT: .LBB35_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a1, a2, .LBB35_8
-; CHECK-NOV-NEXT: .LBB35_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: blt a3, a2, .LBB35_5
; CHECK-NOV-NEXT: .LBB35_4: # %entry
; CHECK-NOV-NEXT: mv a3, a2
@@ -4343,12 +4294,10 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB35_6: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
-; CHECK-NOV-NEXT: blt s1, a2, .LBB35_2
+; CHECK-NOV-NEXT: j .LBB35_1
; CHECK-NOV-NEXT: .LBB35_7: # %entry
; CHECK-NOV-NEXT: mv s1, a2
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
-; CHECK-NOV-NEXT: blt a1, a2, .LBB35_3
+; CHECK-NOV-NEXT: j .LBB35_2
; CHECK-NOV-NEXT: .LBB35_8: # %entry
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: bge a3, a2, .LBB35_4
@@ -4457,7 +4406,7 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
; CHECK-NOV-NEXT: .LBB36_2: # %entry
; CHECK-NOV-NEXT: lui a2, 1048568
; CHECK-NOV-NEXT: bge a2, a0, .LBB36_7
-; CHECK-NOV-NEXT: .LBB36_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bge a2, a1, .LBB36_8
; CHECK-NOV-NEXT: .LBB36_4: # %entry
; CHECK-NOV-NEXT: ret
@@ -4466,8 +4415,7 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
; CHECK-NOV-NEXT: blt a0, a2, .LBB36_2
; CHECK-NOV-NEXT: .LBB36_6: # %entry
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: lui a2, 1048568
-; CHECK-NOV-NEXT: blt a2, a0, .LBB36_3
+; CHECK-NOV-NEXT: j .LBB36_2
; CHECK-NOV-NEXT: .LBB36_7: # %entry
; CHECK-NOV-NEXT: lui a0, 1048568
; CHECK-NOV-NEXT: blt a2, a1, .LBB36_4
@@ -4570,18 +4518,18 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a5, a5, -1
; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a5, .LBB39_10
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB39_1: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a5, .LBB39_11
; CHECK-NOV-NEXT: .LBB39_2: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
; CHECK-NOV-NEXT: bge a3, a5, .LBB39_12
-; CHECK-NOV-NEXT: .LBB39_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bge a4, a5, .LBB39_13
; CHECK-NOV-NEXT: .LBB39_4: # %entry
; CHECK-NOV-NEXT: lui a5, 1048568
; CHECK-NOV-NEXT: bge a5, a4, .LBB39_14
-; CHECK-NOV-NEXT: .LBB39_5: # %entry
+; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: bge a5, a3, .LBB39_15
; CHECK-NOV-NEXT: .LBB39_6: # %entry
; CHECK-NOV-NEXT: bge a5, a2, .LBB39_16
@@ -4597,19 +4545,16 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB39_10: # %entry
; CHECK-NOV-NEXT: mv a1, a5
-; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a5, .LBB39_2
+; CHECK-NOV-NEXT: j .LBB39_1
; CHECK-NOV-NEXT: .LBB39_11: # %entry
; CHECK-NOV-NEXT: mv a2, a5
-; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
-; CHECK-NOV-NEXT: blt a3, a5, .LBB39_3
+; CHECK-NOV-NEXT: j .LBB39_2
; CHECK-NOV-NEXT: .LBB39_12: # %entry
; CHECK-NOV-NEXT: mv a3, a5
; CHECK-NOV-NEXT: blt a4, a5, .LBB39_4
; CHECK-NOV-NEXT: .LBB39_13: # %entry
; CHECK-NOV-NEXT: mv a4, a5
-; CHECK-NOV-NEXT: lui a5, 1048568
-; CHECK-NOV-NEXT: blt a5, a4, .LBB39_5
+; CHECK-NOV-NEXT: j .LBB39_4
; CHECK-NOV-NEXT: .LBB39_14: # %entry
; CHECK-NOV-NEXT: lui a4, 1048568
; CHECK-NOV-NEXT: blt a5, a3, .LBB39_6
@@ -4644,13 +4589,13 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: fcvt.wu.s a2, fa1, rtz
; CHECK-NOV-NEXT: bgeu a1, a3, .LBB40_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB40_1: # %entry
; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz
; CHECK-NOV-NEXT: bgeu a2, a3, .LBB40_7
; CHECK-NOV-NEXT: .LBB40_2: # %entry
; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz
; CHECK-NOV-NEXT: bgeu a4, a3, .LBB40_8
-; CHECK-NOV-NEXT: .LBB40_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: bltu a5, a3, .LBB40_5
; CHECK-NOV-NEXT: .LBB40_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -4662,12 +4607,10 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB40_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz
-; CHECK-NOV-NEXT: bltu a2, a3, .LBB40_2
+; CHECK-NOV-NEXT: j .LBB40_1
; CHECK-NOV-NEXT: .LBB40_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz
-; CHECK-NOV-NEXT: bltu a4, a3, .LBB40_3
+; CHECK-NOV-NEXT: j .LBB40_2
; CHECK-NOV-NEXT: .LBB40_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bgeu a5, a3, .LBB40_4
@@ -4695,13 +4638,13 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a3, .LBB41_6
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB41_1: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a4, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a3, .LBB41_7
; CHECK-NOV-NEXT: .LBB41_2: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a4, a3, .LBB41_8
-; CHECK-NOV-NEXT: .LBB41_3: # %entry
+; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: blt a5, a3, .LBB41_5
; CHECK-NOV-NEXT: .LBB41_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -4725,12 +4668,10 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB41_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: fcvt.w.s a4, fa1, rtz
-; CHECK-NOV-NEXT: blt a2, a3, .LBB41_2
+; CHECK-NOV-NEXT: j .LBB41_1
; CHECK-NOV-NEXT: .LBB41_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
-; CHECK-NOV-NEXT: blt a4, a3, .LBB41_3
+; CHECK-NOV-NEXT: j .LBB41_2
; CHECK-NOV-NEXT: .LBB41_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bge a5, a3, .LBB41_4
@@ -4827,7 +4768,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a7, 8
; CHECK-NOV-NEXT: addiw a7, a7, -1
; CHECK-NOV-NEXT: bge a0, a7, .LBB42_18
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB42_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
; CHECK-NOV-NEXT: bge s1, a7, .LBB42_19
; CHECK-NOV-NEXT: .LBB42_2: # %entry
@@ -4845,12 +4786,12 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB42_6: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
; CHECK-NOV-NEXT: bge a5, a7, .LBB42_24
-; CHECK-NOV-NEXT: .LBB42_7: # %entry
+; CHECK-NOV-NEXT: # %bb.7: # %entry
; CHECK-NOV-NEXT: bge a6, a7, .LBB42_25
; CHECK-NOV-NEXT: .LBB42_8: # %entry
; CHECK-NOV-NEXT: lui a7, 1048568
; CHECK-NOV-NEXT: bge a7, a6, .LBB42_26
-; CHECK-NOV-NEXT: .LBB42_9: # %entry
+; CHECK-NOV-NEXT: # %bb.9: # %entry
; CHECK-NOV-NEXT: bge a7, a5, .LBB42_27
; CHECK-NOV-NEXT: .LBB42_10: # %entry
; CHECK-NOV-NEXT: bge a7, a4, .LBB42_28
@@ -4913,35 +4854,28 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB42_18: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a7
-; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: blt s1, a7, .LBB42_2
+; CHECK-NOV-NEXT: j .LBB42_1
; CHECK-NOV-NEXT: .LBB42_19: # %entry
; CHECK-NOV-NEXT: mv s1, a7
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz
-; CHECK-NOV-NEXT: blt a1, a7, .LBB42_3
+; CHECK-NOV-NEXT: j .LBB42_2
; CHECK-NOV-NEXT: .LBB42_20: # %entry
; CHECK-NOV-NEXT: mv a1, a7
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz
-; CHECK-NOV-NEXT: blt a3, a7, .LBB42_4
+; CHECK-NOV-NEXT: j .LBB42_3
; CHECK-NOV-NEXT: .LBB42_21: # %entry
; CHECK-NOV-NEXT: mv a3, a7
-; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
-; CHECK-NOV-NEXT: blt a2, a7, .LBB42_5
+; CHECK-NOV-NEXT: j .LBB42_4
; CHECK-NOV-NEXT: .LBB42_22: # %entry
; CHECK-NOV-NEXT: mv a2, a7
-; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
-; CHECK-NOV-NEXT: blt a4, a7, .LBB42_6
+; CHECK-NOV-NEXT: j .LBB42_5
; CHECK-NOV-NEXT: .LBB42_23: # %entry
; CHECK-NOV-NEXT: mv a4, a7
-; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
-; CHECK-NOV-NEXT: blt a5, a7, .LBB42_7
+; CHECK-NOV-NEXT: j .LBB42_6
; CHECK-NOV-NEXT: .LBB42_24: # %entry
; CHECK-NOV-NEXT: mv a5, a7
; CHECK-NOV-NEXT: blt a6, a7, .LBB42_8
; CHECK-NOV-NEXT: .LBB42_25: # %entry
; CHECK-NOV-NEXT: mv a6, a7
-; CHECK-NOV-NEXT: lui a7, 1048568
-; CHECK-NOV-NEXT: blt a7, a6, .LBB42_9
+; CHECK-NOV-NEXT: j .LBB42_8
; CHECK-NOV-NEXT: .LBB42_26: # %entry
; CHECK-NOV-NEXT: lui a6, 1048568
; CHECK-NOV-NEXT: blt a7, a5, .LBB42_10
@@ -5209,7 +5143,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a3, 16
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: bgeu a0, a3, .LBB43_10
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB43_1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
; CHECK-NOV-NEXT: bgeu s1, a3, .LBB43_11
; CHECK-NOV-NEXT: .LBB43_2: # %entry
@@ -5227,7 +5161,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB43_6: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
; CHECK-NOV-NEXT: bgeu a6, a3, .LBB43_16
-; CHECK-NOV-NEXT: .LBB43_7: # %entry
+; CHECK-NOV-NEXT: # %bb.7: # %entry
; CHECK-NOV-NEXT: bltu a7, a3, .LBB43_9
; CHECK-NOV-NEXT: .LBB43_8: # %entry
; CHECK-NOV-NEXT: mv a7, a3
@@ -5278,28 +5212,22 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB43_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
-; CHECK-NOV-NEXT: bltu s1, a3, .LBB43_2
+; CHECK-NOV-NEXT: j .LBB43_1
; CHECK-NOV-NEXT: .LBB43_11: # %entry
; CHECK-NOV-NEXT: mv s1, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz
-; CHECK-NOV-NEXT: bltu a1, a3, .LBB43_3
+; CHECK-NOV-NEXT: j .LBB43_2
; CHECK-NOV-NEXT: .LBB43_12: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz
-; CHECK-NOV-NEXT: bltu a2, a3, .LBB43_4
+; CHECK-NOV-NEXT: j .LBB43_3
; CHECK-NOV-NEXT: .LBB43_13: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz
-; CHECK-NOV-NEXT: bltu a4, a3, .LBB43_5
+; CHECK-NOV-NEXT: j .LBB43_4
; CHECK-NOV-NEXT: .LBB43_14: # %entry
; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz
-; CHECK-NOV-NEXT: bltu a5, a3, .LBB43_6
+; CHECK-NOV-NEXT: j .LBB43_5
; CHECK-NOV-NEXT: .LBB43_15: # %entry
; CHECK-NOV-NEXT: mv a5, a3
-; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
-; CHECK-NOV-NEXT: bltu a6, a3, .LBB43_7
+; CHECK-NOV-NEXT: j .LBB43_6
; CHECK-NOV-NEXT: .LBB43_16: # %entry
; CHECK-NOV-NEXT: mv a6, a3
; CHECK-NOV-NEXT: bgeu a7, a3, .LBB43_8
@@ -5548,7 +5476,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a3, 16
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: bge a0, a3, .LBB44_10
-; CHECK-NOV-NEXT: # %bb.1: # %entry
+; CHECK-NOV-NEXT: .LBB44_1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
; CHECK-NOV-NEXT: bge s1, a3, .LBB44_11
; CHECK-NOV-NEXT: .LBB44_2: # %entry
@@ -5566,7 +5494,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB44_6: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
; CHECK-NOV-NEXT: bge a6, a3, .LBB44_16
-; CHECK-NOV-NEXT: .LBB44_7: # %entry
+; CHECK-NOV-NEXT: # %bb.7: # %entry
; CHECK-NOV-NEXT: blt a7, a3, .LBB44_9
; CHECK-NOV-NEXT: .LBB44_8: # %entry
; CHECK-NOV-NEXT: mv a7, a3
@@ -5641,28 +5569,22 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB44_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a3
-; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: blt s1, a3, .LBB44_2
+; CHECK-NOV-NEXT: j .LBB44_1
; CHECK-NOV-NEXT: .LBB44_11: # %entry
; CHECK-NOV-NEXT: mv s1, a3
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
-; CHECK-NOV-NEXT: blt a1, a3, .LBB44_3
+; CHECK-NOV-NEXT: j .LBB44_2
; CHECK-NOV-NEXT: .LBB44_12: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: fcvt.l.s a4, fs3, rtz
-; CHECK-NOV-NEXT: blt a2, a3, .LBB44_4
+; CHECK-NOV-NEXT: j .LBB44_3
; CHECK-NOV-NEXT: .LBB44_13: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz
-; CHECK-NOV-NEXT: blt a4, a3, .LBB44_5
+; CHECK-NOV-NEXT: j .LBB44_4
; CHECK-NOV-NEXT: .LBB44_14: # %entry
; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz
-; CHECK-NOV-NEXT: blt a5, a3, .LBB44_6
+; CHECK-NOV-NEXT: j .LBB44_5
; CHECK-NOV-NEXT: .LBB44_15: # %entry
; CHECK-NOV-NEXT: mv a5, a3
-; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
-; CHECK-NOV-NEXT: blt a6, a3, .LBB44_7
+; CHECK-NOV-NEXT: j .LBB44_6
; CHECK-NOV-NEXT: .LBB44_16: # %entry
; CHECK-NOV-NEXT: mv a6, a3
; CHECK-NOV-NEXT: bge a7, a3, .LBB44_8
@@ -5892,28 +5814,27 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-NOV-NEXT: # %bb.10: # %entry
; CHECK-NOV-NEXT: slti a5, a5, 0
; CHECK-NOV-NEXT: xori a5, a5, 1
-; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: beqz a5, .LBB45_12
-; CHECK-NOV-NEXT: j .LBB45_13
+; CHECK-NOV-NEXT: j .LBB45_12
; CHECK-NOV-NEXT: .LBB45_11:
; CHECK-NOV-NEXT: sltu a5, a3, s0
+; CHECK-NOV-NEXT: .LBB45_12:
; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: bnez a5, .LBB45_13
-; CHECK-NOV-NEXT: .LBB45_12: # %entry
+; CHECK-NOV-NEXT: bnez a5, .LBB45_14
+; CHECK-NOV-NEXT: # %bb.13: # %entry
; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: .LBB45_13: # %entry
-; CHECK-NOV-NEXT: beq a1, a0, .LBB45_15
-; CHECK-NOV-NEXT: # %bb.14: # %entry
+; CHECK-NOV-NEXT: .LBB45_14: # %entry
+; CHECK-NOV-NEXT: beq a1, a0, .LBB45_16
+; CHECK-NOV-NEXT: # %bb.15: # %entry
; CHECK-NOV-NEXT: slti a0, a1, 0
; CHECK-NOV-NEXT: xori a0, a0, 1
-; CHECK-NOV-NEXT: beqz a0, .LBB45_16
-; CHECK-NOV-NEXT: j .LBB45_17
-; CHECK-NOV-NEXT: .LBB45_15:
+; CHECK-NOV-NEXT: beqz a0, .LBB45_17
+; CHECK-NOV-NEXT: j .LBB45_18
+; CHECK-NOV-NEXT: .LBB45_16:
; CHECK-NOV-NEXT: sltu a0, a3, a2
-; CHECK-NOV-NEXT: bnez a0, .LBB45_17
-; CHECK-NOV-NEXT: .LBB45_16: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: bnez a0, .LBB45_18
; CHECK-NOV-NEXT: .LBB45_17: # %entry
+; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: .LBB45_18: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -5983,28 +5904,27 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: # %bb.10: # %entry
; CHECK-V-NEXT: slti a5, a5, 0
; CHECK-V-NEXT: xori a5, a5, 1
-; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: beqz a5, .LBB45_12
-; CHECK-V-NEXT: j .LBB45_13
+; CHECK-V-NEXT: j .LBB45_12
; CHECK-V-NEXT: .LBB45_11:
; CHECK-V-NEXT: sltu a5, a3, s0
+; CHECK-V-NEXT: .LBB45_12:
; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: bnez a5, .LBB45_13
-; CHECK-V-NEXT: .LBB45_12: # %entry
+; CHECK-V-NEXT: bnez a5, .LBB45_14
+; CHECK-V-NEXT: # %bb.13: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: .LBB45_13: # %entry
-; CHECK-V-NEXT: beq a1, a2, .LBB45_15
-; CHECK-V-NEXT: # %bb.14: # %entry
+; CHECK-V-NEXT: .LBB45_14: # %entry
+; CHECK-V-NEXT: beq a1, a2, .LBB45_16
+; CHECK-V-NEXT: # %bb.15: # %entry
; CHECK-V-NEXT: slti a1, a1, 0
; CHECK-V-NEXT: xori a1, a1, 1
-; CHECK-V-NEXT: beqz a1, .LBB45_16
-; CHECK-V-NEXT: j .LBB45_17
-; CHECK-V-NEXT: .LBB45_15:
+; CHECK-V-NEXT: beqz a1, .LBB45_17
+; CHECK-V-NEXT: j .LBB45_18
+; CHECK-V-NEXT: .LBB45_16:
; CHECK-V-NEXT: sltu a1, a3, a0
-; CHECK-V-NEXT: bnez a1, .LBB45_17
-; CHECK-V-NEXT: .LBB45_16: # %entry
-; CHECK-V-NEXT: mv a0, a3
+; CHECK-V-NEXT: bnez a1, .LBB45_18
; CHECK-V-NEXT: .LBB45_17: # %entry
+; CHECK-V-NEXT: mv a0, a3
+; CHECK-V-NEXT: .LBB45_18: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vmv.s.x v9, s0
@@ -6296,28 +6216,27 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-NOV-NEXT: # %bb.10: # %entry
; CHECK-NOV-NEXT: slti a5, a5, 0
; CHECK-NOV-NEXT: xori a5, a5, 1
-; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: beqz a5, .LBB48_12
-; CHECK-NOV-NEXT: j .LBB48_13
+; CHECK-NOV-NEXT: j .LBB48_12
; CHECK-NOV-NEXT: .LBB48_11:
; CHECK-NOV-NEXT: sltu a5, a3, s0
+; CHECK-NOV-NEXT: .LBB48_12:
; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: bnez a5, .LBB48_13
-; CHECK-NOV-NEXT: .LBB48_12: # %entry
+; CHECK-NOV-NEXT: bnez a5, .LBB48_14
+; CHECK-NOV-NEXT: # %bb.13: # %entry
; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: .LBB48_13: # %entry
-; CHECK-NOV-NEXT: beq a1, a0, .LBB48_15
-; CHECK-NOV-NEXT: # %bb.14: # %entry
+; CHECK-NOV-NEXT: .LBB48_14: # %entry
+; CHECK-NOV-NEXT: beq a1, a0, .LBB48_16
+; CHECK-NOV-NEXT: # %bb.15: # %entry
; CHECK-NOV-NEXT: slti a0, a1, 0
; CHECK-NOV-NEXT: xori a0, a0, 1
-; CHECK-NOV-NEXT: beqz a0, .LBB48_16
-; CHECK-NOV-NEXT: j .LBB48_17
-; CHECK-NOV-NEXT: .LBB48_15:
+; CHECK-NOV-NEXT: beqz a0, .LBB48_17
+; CHECK-NOV-NEXT: j .LBB48_18
+; CHECK-NOV-NEXT: .LBB48_16:
; CHECK-NOV-NEXT: sltu a0, a3, a2
-; CHECK-NOV-NEXT: bnez a0, .LBB48_17
-; CHECK-NOV-NEXT: .LBB48_16: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: bnez a0, .LBB48_18
; CHECK-NOV-NEXT: .LBB48_17: # %entry
+; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: .LBB48_18: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -6387,28 +6306,27 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: # %bb.10: # %entry
; CHECK-V-NEXT: slti a5, a5, 0
; CHECK-V-NEXT: xori a5, a5, 1
-; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: beqz a5, .LBB48_12
-; CHECK-V-NEXT: j .LBB48_13
+; CHECK-V-NEXT: j .LBB48_12
; CHECK-V-NEXT: .LBB48_11:
; CHECK-V-NEXT: sltu a5, a3, s0
+; CHECK-V-NEXT: .LBB48_12:
; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: bnez a5, .LBB48_13
-; CHECK-V-NEXT: .LBB48_12: # %entry
+; CHECK-V-NEXT: bnez a5, .LBB48_14
+; CHECK-V-NEXT: # %bb.13: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: .LBB48_13: # %entry
-; CHECK-V-NEXT: beq a1, a2, .LBB48_15
-; CHECK-V-NEXT: # %bb.14: # %entry
+; CHECK-V-NEXT: .LBB48_14: # %entry
+; CHECK-V-NEXT: beq a1, a2, .LBB48_16
+; CHECK-V-NEXT: # %bb.15: # %entry
; CHECK-V-NEXT: slti a1, a1, 0
; CHECK-V-NEXT: xori a1, a1, 1
-; CHECK-V-NEXT: beqz a1, .LBB48_16
-; CHECK-V-NEXT: j .LBB48_17
-; CHECK-V-NEXT: .LBB48_15:
+; CHECK-V-NEXT: beqz a1, .LBB48_17
+; CHECK-V-NEXT: j .LBB48_18
+; CHECK-V-NEXT: .LBB48_16:
; CHECK-V-NEXT: sltu a1, a3, a0
-; CHECK-V-NEXT: bnez a1, .LBB48_17
-; CHECK-V-NEXT: .LBB48_16: # %entry
-; CHECK-V-NEXT: mv a0, a3
+; CHECK-V-NEXT: bnez a1, .LBB48_18
; CHECK-V-NEXT: .LBB48_17: # %entry
+; CHECK-V-NEXT: mv a0, a3
+; CHECK-V-NEXT: .LBB48_18: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vmv.s.x v9, s0
@@ -6703,28 +6621,27 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NOV-NEXT: # %bb.10: # %entry
; CHECK-NOV-NEXT: slti a5, a5, 0
; CHECK-NOV-NEXT: xori a5, a5, 1
-; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: beqz a5, .LBB51_12
-; CHECK-NOV-NEXT: j .LBB51_13
+; CHECK-NOV-NEXT: j .LBB51_12
; CHECK-NOV-NEXT: .LBB51_11:
; CHECK-NOV-NEXT: sltu a5, a3, s0
+; CHECK-NOV-NEXT: .LBB51_12:
; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: bnez a5, .LBB51_13
-; CHECK-NOV-NEXT: .LBB51_12: # %entry
+; CHECK-NOV-NEXT: bnez a5, .LBB51_14
+; CHECK-NOV-NEXT: # %bb.13: # %entry
; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: .LBB51_13: # %entry
-; CHECK-NOV-NEXT: beq a1, a0, .LBB51_15
-; CHECK-NOV-NEXT: # %bb.14: # %entry
+; CHECK-NOV-NEXT: .LBB51_14: # %entry
+; CHECK-NOV-NEXT: beq a1, a0, .LBB51_16
+; CHECK-NOV-NEXT: # %bb.15: # %entry
; CHECK-NOV-NEXT: slti a0, a1, 0
; CHECK-NOV-NEXT: xori a0, a0, 1
-; CHECK-NOV-NEXT: beqz a0, .LBB51_16
-; CHECK-NOV-NEXT: j .LBB51_17
-; CHECK-NOV-NEXT: .LBB51_15:
+; CHECK-NOV-NEXT: beqz a0, .LBB51_17
+; CHECK-NOV-NEXT: j .LBB51_18
+; CHECK-NOV-NEXT: .LBB51_16:
; CHECK-NOV-NEXT: sltu a0, a3, a2
-; CHECK-NOV-NEXT: bnez a0, .LBB51_17
-; CHECK-NOV-NEXT: .LBB51_16: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: bnez a0, .LBB51_18
; CHECK-NOV-NEXT: .LBB51_17: # %entry
+; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: .LBB51_18: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -6792,28 +6709,27 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-V-NEXT: # %bb.10: # %entry
; CHECK-V-NEXT: slti a5, a5, 0
; CHECK-V-NEXT: xori a5, a5, 1
-; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: beqz a5, .LBB51_12
-; CHECK-V-NEXT: j .LBB51_13
+; CHECK-V-NEXT: j .LBB51_12
; CHECK-V-NEXT: .LBB51_11:
; CHECK-V-NEXT: sltu a5, a3, s0
+; CHECK-V-NEXT: .LBB51_12:
; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: bnez a5, .LBB51_13
-; CHECK-V-NEXT: .LBB51_12: # %entry
+; CHECK-V-NEXT: bnez a5, .LBB51_14
+; CHECK-V-NEXT: # %bb.13: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: .LBB51_13: # %entry
-; CHECK-V-NEXT: beq a1, a2, .LBB51_15
-; CHECK-V-NEXT: # %bb.14: # %entry
+; CHECK-V-NEXT: .LBB51_14: # %entry
+; CHECK-V-NEXT: beq a1, a2, .LBB51_16
+; CHECK-V-NEXT: # %bb.15: # %entry
; CHECK-V-NEXT: slti a1, a1, 0
; CHECK-V-NEXT: xori a1, a1, 1
-; CHECK-V-NEXT: beqz a1, .LBB51_16
-; CHECK-V-NEXT: j .LBB51_17
-; CHECK-V-NEXT: .LBB51_15:
+; CHECK-V-NEXT: beqz a1, .LBB51_17
+; CHECK-V-NEXT: j .LBB51_18
+; CHECK-V-NEXT: .LBB51_16:
; CHECK-V-NEXT: sltu a1, a3, a0
-; CHECK-V-NEXT: bnez a1, .LBB51_17
-; CHECK-V-NEXT: .LBB51_16: # %entry
-; CHECK-V-NEXT: mv a0, a3
+; CHECK-V-NEXT: bnez a1, .LBB51_18
; CHECK-V-NEXT: .LBB51_17: # %entry
+; CHECK-V-NEXT: mv a0, a3
+; CHECK-V-NEXT: .LBB51_18: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v9, a0
; CHECK-V-NEXT: vmv.s.x v8, s0
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr93587.ll b/llvm/test/CodeGen/RISCV/rvv/pr93587.ll
index c2998bf20fa0a..52cbb0959e116 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr93587.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr93587.ll
@@ -9,21 +9,11 @@ define i16 @f() {
; CHECK: # %bb.0: # %BB
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: j .LBB0_1
; CHECK-NEXT: .LBB0_1: # %BB1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: j .LBB0_1
-; CHECK-NEXT: # %bb.2: # %BB1
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: bnez a0, .LBB0_1
-; CHECK-NEXT: j .LBB0_3
-; CHECK-NEXT: .LBB0_3: # %BB2
-; CHECK-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: ret
BB:
br label %BB1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll
index ed6b7f1e6efb8..60766bd7138ed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll
@@ -74,15 +74,15 @@ define dso_local void @test_store1(ptr nocapture noundef writeonly %dst, ptr noc
;
; RV64-LABEL: test_store1:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: blez a3, .LBB0_6
+; RV64-NEXT: blez a3, .LBB0_10
; RV64-NEXT: # %bb.1: # %for.body.preheader
; RV64-NEXT: li a5, 8
; RV64-NEXT: li a4, 0
-; RV64-NEXT: bltu a3, a5, .LBB0_7
+; RV64-NEXT: bltu a3, a5, .LBB0_6
; RV64-NEXT: # %bb.2: # %for.body.preheader
; RV64-NEXT: sub a5, a0, a1
; RV64-NEXT: li a6, 31
-; RV64-NEXT: bgeu a6, a5, .LBB0_7
+; RV64-NEXT: bgeu a6, a5, .LBB0_6
; RV64-NEXT: # %bb.3: # %vector.ph
; RV64-NEXT: lui a4, 524288
; RV64-NEXT: addiw a4, a4, -8
@@ -104,29 +104,29 @@ define dso_local void @test_store1(ptr nocapture noundef writeonly %dst, ptr noc
; RV64-NEXT: add a0, a0, a7
; RV64-NEXT: bne a6, a5, .LBB0_4
; RV64-NEXT: # %bb.5: # %middle.block
-; RV64-NEXT: bne a4, a3, .LBB0_7
-; RV64-NEXT: .LBB0_6: # %for.cond.cleanup
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB0_7: # %for.body.preheader13
+; RV64-NEXT: beq a4, a3, .LBB0_10
+; RV64-NEXT: .LBB0_6: # %for.body.preheader13
; RV64-NEXT: slli a4, a4, 2
; RV64-NEXT: slli a5, a3, 2
; RV64-NEXT: add a3, a1, a4
; RV64-NEXT: add a1, a1, a5
-; RV64-NEXT: j .LBB0_9
-; RV64-NEXT: .LBB0_8: # %for.inc
-; RV64-NEXT: # in Loop: Header=BB0_9 Depth=1
+; RV64-NEXT: j .LBB0_8
+; RV64-NEXT: .LBB0_7: # %for.inc
+; RV64-NEXT: # in Loop: Header=BB0_8 Depth=1
; RV64-NEXT: addi a3, a3, 4
-; RV64-NEXT: beq a3, a1, .LBB0_6
-; RV64-NEXT: .LBB0_9: # %for.body
+; RV64-NEXT: beq a3, a1, .LBB0_10
+; RV64-NEXT: .LBB0_8: # %for.body
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NEXT: lw a4, 0(a3)
-; RV64-NEXT: bge a4, a2, .LBB0_8
-; RV64-NEXT: # %bb.10: # %if.then
-; RV64-NEXT: # in Loop: Header=BB0_9 Depth=1
+; RV64-NEXT: bge a4, a2, .LBB0_7
+; RV64-NEXT: # %bb.9: # %if.then
+; RV64-NEXT: # in Loop: Header=BB0_8 Depth=1
; RV64-NEXT: addi a5, a0, 4
; RV64-NEXT: sw a4, 0(a0)
; RV64-NEXT: mv a0, a5
-; RV64-NEXT: j .LBB0_8
+; RV64-NEXT: j .LBB0_7
+; RV64-NEXT: .LBB0_10: # %for.cond.cleanup
+; RV64-NEXT: ret
entry:
%cmp8 = icmp sgt i32 %n, 0
br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 35311bb156f44..6da5c594bf600 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -179,19 +179,19 @@ define <vscale x 1 x double> @test5(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a2, a1, 1
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: bnez a2, .LBB4_3
+; CHECK-NEXT: bnez a2, .LBB4_4
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: vfsub.vv v9, v8, v9
+; CHECK-NEXT: .LBB4_2: # %if.else
; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB4_4
-; CHECK-NEXT: .LBB4_2: # %if.then4
+; CHECK-NEXT: beqz a1, .LBB4_5
+; CHECK-NEXT: # %bb.3: # %if.then4
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB4_3: # %if.then
+; CHECK-NEXT: .LBB4_4: # %if.then
; CHECK-NEXT: vfadd.vv v9, v8, v9
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: bnez a1, .LBB4_2
-; CHECK-NEXT: .LBB4_4: # %if.else5
+; CHECK-NEXT: j .LBB4_2
+; CHECK-NEXT: .LBB4_5: # %if.else5
; CHECK-NEXT: vfmul.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
@@ -236,12 +236,13 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a2, a1, 1
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: bnez a2, .LBB5_3
+; CHECK-NEXT: bnez a2, .LBB5_4
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: vfsub.vv v8, v8, v9
+; CHECK-NEXT: .LBB5_2: # %if.else
; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB5_4
-; CHECK-NEXT: .LBB5_2: # %if.then4
+; CHECK-NEXT: beqz a1, .LBB5_5
+; CHECK-NEXT: # %bb.3: # %if.then4
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: lui a1, %hi(.LCPI5_1)
@@ -252,12 +253,11 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse64.v v9, (a1)
-; CHECK-NEXT: j .LBB5_5
-; CHECK-NEXT: .LBB5_3: # %if.then
+; CHECK-NEXT: j .LBB5_6
+; CHECK-NEXT: .LBB5_4: # %if.then
; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: bnez a1, .LBB5_2
-; CHECK-NEXT: .LBB5_4: # %if.else5
+; CHECK-NEXT: j .LBB5_2
+; CHECK-NEXT: .LBB5_5: # %if.else5
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: lui a1, 260096
; CHECK-NEXT: vmv.v.x v9, a1
@@ -267,7 +267,7 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse32.v v9, (a1)
-; CHECK-NEXT: .LBB5_5: # %if.end10
+; CHECK-NEXT: .LBB5_6: # %if.end10
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfmul.vv v8, v8, v8
; CHECK-NEXT: ret
@@ -1060,18 +1060,18 @@ define void @cross_block_avl_extend_backwards(i1 %cond, <vscale x 8 x i8> %v, pt
; CHECK-LABEL: cross_block_avl_extend_backwards:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: beqz a0, .LBB25_2
-; CHECK-NEXT: # %bb.1: # %exit
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB25_2: # %bar
+; CHECK-NEXT: bnez a0, .LBB25_3
+; CHECK-NEXT: # %bb.1: # %bar
; CHECK-NEXT: addi a2, a2, 1
-; CHECK-NEXT: .LBB25_3: # %foo
+; CHECK-NEXT: .LBB25_2: # %foo
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a1)
-; CHECK-NEXT: j .LBB25_3
+; CHECK-NEXT: j .LBB25_2
+; CHECK-NEXT: .LBB25_3: # %exit
+; CHECK-NEXT: ret
entry:
br i1 %cond, label %exit, label %bar
foo:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index 75f4b977a98b0..77119e50d5add 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -14,9 +14,9 @@
define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_dst_stride, ptr nocapture noundef readonly %src1, i32 noundef signext %i_src1_stride, ptr nocapture noundef readonly %src2, i32 noundef signext %i_src2_stride, i32 noundef signext %i_width, i32 noundef signext %i_height) {
; RV32-LABEL: test1:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: blez a7, .LBB0_17
+; RV32-NEXT: blez a7, .LBB0_13
; RV32-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph
-; RV32-NEXT: blez a6, .LBB0_17
+; RV32-NEXT: blez a6, .LBB0_13
; RV32-NEXT: # %bb.2: # %for.cond1.preheader.us.preheader
; RV32-NEXT: addi t0, a7, -1
; RV32-NEXT: csrr t2, vlenb
@@ -26,9 +26,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: slli t1, t2, 1
; RV32-NEXT: li t6, 32
; RV32-NEXT: mv t0, t1
-; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader
; RV32-NEXT: li t0, 32
-; RV32-NEXT: # %bb.4: # %for.cond1.preheader.us.preheader
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -37,22 +35,16 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: .cfi_offset s0, -4
; RV32-NEXT: .cfi_offset s1, -8
; RV32-NEXT: .cfi_offset s2, -12
-; RV32-NEXT: .cfi_remember_state
; RV32-NEXT: add t3, a0, t3
; RV32-NEXT: add t4, a2, t4
; RV32-NEXT: add s0, a4, t5
-; RV32-NEXT: bltu t6, t1, .LBB0_6
-; RV32-NEXT: # %bb.5: # %for.cond1.preheader.us.preheader
+; RV32-NEXT: bltu t6, t1, .LBB0_4
+; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader
; RV32-NEXT: li t1, 32
-; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us.preheader
+; RV32-NEXT: .LBB0_4: # %for.cond1.preheader.us.preheader
; RV32-NEXT: add t3, t3, a6
; RV32-NEXT: add t5, t4, a6
; RV32-NEXT: add t4, s0, a6
-; RV32-NEXT: j .LBB0_8
-; RV32-NEXT: # %bb.7: # %for.cond1.preheader.us.preheader
-; RV32-NEXT: mv t1, t0
-; RV32-NEXT: .LBB0_8: # %for.cond1.preheader.us.preheader
-; RV32-NEXT: .cfi_restore_state
; RV32-NEXT: li t0, 0
; RV32-NEXT: sltu t5, a0, t5
; RV32-NEXT: sltu t6, a2, t3
@@ -71,25 +63,25 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: or t1, t1, t3
; RV32-NEXT: andi t1, t1, 1
; RV32-NEXT: slli t2, t2, 1
-; RV32-NEXT: j .LBB0_10
-; RV32-NEXT: .LBB0_9: # %for.cond1.for.cond.cleanup3_crit_edge.us
-; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1
+; RV32-NEXT: j .LBB0_6
+; RV32-NEXT: .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us
+; RV32-NEXT: # in Loop: Header=BB0_6 Depth=1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a2, a2, a3
; RV32-NEXT: addi t0, t0, 1
; RV32-NEXT: add a4, a4, a5
-; RV32-NEXT: beq t0, a7, .LBB0_16
-; RV32-NEXT: .LBB0_10: # %for.cond1.preheader.us
+; RV32-NEXT: beq t0, a7, .LBB0_12
+; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us
; RV32-NEXT: # =>This Loop Header: Depth=1
-; RV32-NEXT: # Child Loop BB0_13 Depth 2
-; RV32-NEXT: # Child Loop BB0_15 Depth 2
-; RV32-NEXT: beqz t1, .LBB0_12
-; RV32-NEXT: # %bb.11: # in Loop: Header=BB0_10 Depth=1
+; RV32-NEXT: # Child Loop BB0_9 Depth 2
+; RV32-NEXT: # Child Loop BB0_11 Depth 2
+; RV32-NEXT: beqz t1, .LBB0_8
+; RV32-NEXT: # %bb.7: # in Loop: Header=BB0_6 Depth=1
; RV32-NEXT: li t4, 0
; RV32-NEXT: li t3, 0
-; RV32-NEXT: j .LBB0_15
-; RV32-NEXT: .LBB0_12: # %vector.ph
-; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1
+; RV32-NEXT: j .LBB0_11
+; RV32-NEXT: .LBB0_8: # %vector.ph
+; RV32-NEXT: # in Loop: Header=BB0_6 Depth=1
; RV32-NEXT: li t3, 0
; RV32-NEXT: neg t4, t2
; RV32-NEXT: and t4, t4, a6
@@ -97,8 +89,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: li t6, 0
; RV32-NEXT: li t5, 0
; RV32-NEXT: vsetvli s0, zero, e8, m2, ta, ma
-; RV32-NEXT: .LBB0_13: # %vector.body
-; RV32-NEXT: # Parent Loop BB0_10 Depth=1
+; RV32-NEXT: .LBB0_9: # %vector.body
+; RV32-NEXT: # Parent Loop BB0_6 Depth=1
; RV32-NEXT: # => This Inner Loop Header: Depth=2
; RV32-NEXT: add s0, a2, t6
; RV32-NEXT: add s1, a4, t6
@@ -113,12 +105,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: or s2, t6, t5
; RV32-NEXT: vs2r.v v8, (s0)
; RV32-NEXT: mv t6, s1
-; RV32-NEXT: bnez s2, .LBB0_13
-; RV32-NEXT: # %bb.14: # %middle.block
-; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1
-; RV32-NEXT: beq t4, a6, .LBB0_9
-; RV32-NEXT: .LBB0_15: # %for.body4.us
-; RV32-NEXT: # Parent Loop BB0_10 Depth=1
+; RV32-NEXT: bnez s2, .LBB0_9
+; RV32-NEXT: # %bb.10: # %middle.block
+; RV32-NEXT: # in Loop: Header=BB0_6 Depth=1
+; RV32-NEXT: beq t4, a6, .LBB0_5
+; RV32-NEXT: .LBB0_11: # %for.body4.us
+; RV32-NEXT: # Parent Loop BB0_6 Depth=1
; RV32-NEXT: # => This Inner Loop Header: Depth=2
; RV32-NEXT: add t5, a2, t4
; RV32-NEXT: add t6, a4, t4
@@ -134,9 +126,9 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: srli t5, t5, 1
; RV32-NEXT: or t6, t6, t3
; RV32-NEXT: sb t5, 0(s0)
-; RV32-NEXT: bnez t6, .LBB0_15
-; RV32-NEXT: j .LBB0_9
-; RV32-NEXT: .LBB0_16:
+; RV32-NEXT: bnez t6, .LBB0_11
+; RV32-NEXT: j .LBB0_5
+; RV32-NEXT: .LBB0_12:
; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
@@ -145,7 +137,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: .cfi_restore s2
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
-; RV32-NEXT: .LBB0_17: # %for.cond.cleanup
+; RV32-NEXT: .LBB0_13: # %for.cond.cleanup
; RV32-NEXT: ret
;
; RV64P670-LABEL: test1:
diff --git a/llvm/test/CodeGen/RISCV/sadd_sat.ll b/llvm/test/CodeGen/RISCV/sadd_sat.ll
index ab03ccc4ba590..d759db93ae481 100644
--- a/llvm/test/CodeGen/RISCV/sadd_sat.ll
+++ b/llvm/test/CodeGen/RISCV/sadd_sat.ll
@@ -116,15 +116,14 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; RV32I-NEXT: lui a1, 8
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: bge a0, a1, .LBB2_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB2_1:
; RV32I-NEXT: lui a1, 1048568
; RV32I-NEXT: bge a1, a0, .LBB2_4
-; RV32I-NEXT: .LBB2_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB2_3:
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lui a1, 1048568
-; RV32I-NEXT: blt a1, a0, .LBB2_2
+; RV32I-NEXT: j .LBB2_1
; RV32I-NEXT: .LBB2_4:
; RV32I-NEXT: lui a0, 1048568
; RV32I-NEXT: ret
@@ -135,15 +134,14 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; RV64I-NEXT: lui a1, 8
; RV64I-NEXT: addiw a1, a1, -1
; RV64I-NEXT: bge a0, a1, .LBB2_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB2_1:
; RV64I-NEXT: lui a1, 1048568
; RV64I-NEXT: bge a1, a0, .LBB2_4
-; RV64I-NEXT: .LBB2_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB2_3:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: lui a1, 1048568
-; RV64I-NEXT: blt a1, a0, .LBB2_2
+; RV64I-NEXT: j .LBB2_1
; RV64I-NEXT: .LBB2_4:
; RV64I-NEXT: lui a0, 1048568
; RV64I-NEXT: ret
@@ -177,15 +175,14 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 127
; RV32I-NEXT: bge a0, a1, .LBB3_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB3_1:
; RV32I-NEXT: li a1, -128
; RV32I-NEXT: bge a1, a0, .LBB3_4
-; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: li a0, 127
-; RV32I-NEXT: li a1, -128
-; RV32I-NEXT: blt a1, a0, .LBB3_2
+; RV32I-NEXT: j .LBB3_1
; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: li a0, -128
; RV32I-NEXT: ret
@@ -195,15 +192,14 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: li a1, 127
; RV64I-NEXT: bge a0, a1, .LBB3_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB3_1:
; RV64I-NEXT: li a1, -128
; RV64I-NEXT: bge a1, a0, .LBB3_4
-; RV64I-NEXT: .LBB3_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB3_3:
; RV64I-NEXT: li a0, 127
-; RV64I-NEXT: li a1, -128
-; RV64I-NEXT: blt a1, a0, .LBB3_2
+; RV64I-NEXT: j .LBB3_1
; RV64I-NEXT: .LBB3_4:
; RV64I-NEXT: li a0, -128
; RV64I-NEXT: ret
@@ -235,15 +231,14 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 7
; RV32I-NEXT: bge a0, a1, .LBB4_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB4_1:
; RV32I-NEXT: li a1, -8
; RV32I-NEXT: bge a1, a0, .LBB4_4
-; RV32I-NEXT: .LBB4_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: li a0, 7
-; RV32I-NEXT: li a1, -8
-; RV32I-NEXT: blt a1, a0, .LBB4_2
+; RV32I-NEXT: j .LBB4_1
; RV32I-NEXT: .LBB4_4:
; RV32I-NEXT: li a0, -8
; RV32I-NEXT: ret
@@ -253,15 +248,14 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: li a1, 7
; RV64I-NEXT: bge a0, a1, .LBB4_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB4_1:
; RV64I-NEXT: li a1, -8
; RV64I-NEXT: bge a1, a0, .LBB4_4
-; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_3:
; RV64I-NEXT: li a0, 7
-; RV64I-NEXT: li a1, -8
-; RV64I-NEXT: blt a1, a0, .LBB4_2
+; RV64I-NEXT: j .LBB4_1
; RV64I-NEXT: .LBB4_4:
; RV64I-NEXT: li a0, -8
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll
index abcf3379d0a6e..d0ef17657da5a 100644
--- a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll
@@ -126,15 +126,14 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: addi a1, a2, -1
; RV32I-NEXT: bge a0, a1, .LBB2_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB2_1:
; RV32I-NEXT: lui a1, 1048568
; RV32I-NEXT: bge a1, a0, .LBB2_4
-; RV32I-NEXT: .LBB2_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB2_3:
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lui a1, 1048568
-; RV32I-NEXT: blt a1, a0, .LBB2_2
+; RV32I-NEXT: j .LBB2_1
; RV32I-NEXT: .LBB2_4:
; RV32I-NEXT: lui a0, 1048568
; RV32I-NEXT: ret
@@ -150,15 +149,14 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: addiw a1, a2, -1
; RV64I-NEXT: bge a0, a1, .LBB2_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB2_1:
; RV64I-NEXT: lui a1, 1048568
; RV64I-NEXT: bge a1, a0, .LBB2_4
-; RV64I-NEXT: .LBB2_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB2_3:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: lui a1, 1048568
-; RV64I-NEXT: blt a1, a0, .LBB2_2
+; RV64I-NEXT: j .LBB2_1
; RV64I-NEXT: .LBB2_4:
; RV64I-NEXT: lui a0, 1048568
; RV64I-NEXT: ret
@@ -204,15 +202,14 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 127
; RV32I-NEXT: bge a0, a1, .LBB3_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB3_1:
; RV32I-NEXT: li a1, -128
; RV32I-NEXT: bge a1, a0, .LBB3_4
-; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: li a0, 127
-; RV32I-NEXT: li a1, -128
-; RV32I-NEXT: blt a1, a0, .LBB3_2
+; RV32I-NEXT: j .LBB3_1
; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: li a0, -128
; RV32I-NEXT: ret
@@ -227,15 +224,14 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: li a1, 127
; RV64I-NEXT: bge a0, a1, .LBB3_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB3_1:
; RV64I-NEXT: li a1, -128
; RV64I-NEXT: bge a1, a0, .LBB3_4
-; RV64I-NEXT: .LBB3_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB3_3:
; RV64I-NEXT: li a0, 127
-; RV64I-NEXT: li a1, -128
-; RV64I-NEXT: blt a1, a0, .LBB3_2
+; RV64I-NEXT: j .LBB3_1
; RV64I-NEXT: .LBB3_4:
; RV64I-NEXT: li a0, -128
; RV64I-NEXT: ret
@@ -279,15 +275,14 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 7
; RV32I-NEXT: bge a0, a1, .LBB4_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB4_1:
; RV32I-NEXT: li a1, -8
; RV32I-NEXT: bge a1, a0, .LBB4_4
-; RV32I-NEXT: .LBB4_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: li a0, 7
-; RV32I-NEXT: li a1, -8
-; RV32I-NEXT: blt a1, a0, .LBB4_2
+; RV32I-NEXT: j .LBB4_1
; RV32I-NEXT: .LBB4_4:
; RV32I-NEXT: li a0, -8
; RV32I-NEXT: ret
@@ -302,15 +297,14 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: li a1, 7
; RV64I-NEXT: bge a0, a1, .LBB4_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB4_1:
; RV64I-NEXT: li a1, -8
; RV64I-NEXT: bge a1, a0, .LBB4_4
-; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_3:
; RV64I-NEXT: li a0, 7
-; RV64I-NEXT: li a1, -8
-; RV64I-NEXT: blt a1, a0, .LBB4_2
+; RV64I-NEXT: j .LBB4_1
; RV64I-NEXT: .LBB4_4:
; RV64I-NEXT: li a0, -8
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/setcc-logic.ll b/llvm/test/CodeGen/RISCV/setcc-logic.ll
index fabb573e98d2a..3d72c55a510eb 100644
--- a/llvm/test/CodeGen/RISCV/setcc-logic.ll
+++ b/llvm/test/CodeGen/RISCV/setcc-logic.ll
@@ -562,23 +562,23 @@ define void @and_ule_ne(i32 signext %0, i32 signext %1, i32 signext %2, i32 sign
define void @or_sge_eq(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3) {
; RV32I-LABEL: or_sge_eq:
; RV32I: # %bb.0:
-; RV32I-NEXT: bge a0, a1, .LBB21_3
+; RV32I-NEXT: bge a0, a1, .LBB21_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: beq a2, a3, .LBB21_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB21_3:
+; RV32I-NEXT: bne a2, a3, .LBB21_3
+; RV32I-NEXT: .LBB21_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB21_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: or_sge_eq:
; RV64I: # %bb.0:
-; RV64I-NEXT: bge a0, a1, .LBB21_3
+; RV64I-NEXT: bge a0, a1, .LBB21_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: beq a2, a3, .LBB21_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB21_3:
+; RV64I-NEXT: bne a2, a3, .LBB21_3
+; RV64I-NEXT: .LBB21_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB21_3:
+; RV64I-NEXT: tail bar
%5 = icmp sge i32 %0, %1
%6 = icmp eq i32 %2, %3
%7 = or i1 %5, %6
@@ -595,23 +595,23 @@ define void @or_sge_eq(i32 signext %0, i32 signext %1, i32 signext %2, i32 signe
define void @or_sle_eq(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3) {
; RV32I-LABEL: or_sle_eq:
; RV32I: # %bb.0:
-; RV32I-NEXT: bge a1, a0, .LBB22_3
+; RV32I-NEXT: bge a1, a0, .LBB22_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: beq a2, a3, .LBB22_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB22_3:
+; RV32I-NEXT: bne a2, a3, .LBB22_3
+; RV32I-NEXT: .LBB22_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB22_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: or_sle_eq:
; RV64I: # %bb.0:
-; RV64I-NEXT: bge a1, a0, .LBB22_3
+; RV64I-NEXT: bge a1, a0, .LBB22_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: beq a2, a3, .LBB22_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB22_3:
+; RV64I-NEXT: bne a2, a3, .LBB22_3
+; RV64I-NEXT: .LBB22_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB22_3:
+; RV64I-NEXT: tail bar
%5 = icmp sle i32 %0, %1
%6 = icmp eq i32 %2, %3
%7 = or i1 %5, %6
@@ -628,23 +628,23 @@ define void @or_sle_eq(i32 signext %0, i32 signext %1, i32 signext %2, i32 signe
define void @or_uge_eq(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3) {
; RV32I-LABEL: or_uge_eq:
; RV32I: # %bb.0:
-; RV32I-NEXT: bgeu a0, a1, .LBB23_3
+; RV32I-NEXT: bgeu a0, a1, .LBB23_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: beq a2, a3, .LBB23_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB23_3:
+; RV32I-NEXT: bne a2, a3, .LBB23_3
+; RV32I-NEXT: .LBB23_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB23_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: or_uge_eq:
; RV64I: # %bb.0:
-; RV64I-NEXT: bgeu a0, a1, .LBB23_3
+; RV64I-NEXT: bgeu a0, a1, .LBB23_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: beq a2, a3, .LBB23_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB23_3:
+; RV64I-NEXT: bne a2, a3, .LBB23_3
+; RV64I-NEXT: .LBB23_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB23_3:
+; RV64I-NEXT: tail bar
%5 = icmp uge i32 %0, %1
%6 = icmp eq i32 %2, %3
%7 = or i1 %5, %6
@@ -661,23 +661,23 @@ define void @or_uge_eq(i32 signext %0, i32 signext %1, i32 signext %2, i32 signe
define void @or_ule_eq(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3) {
; RV32I-LABEL: or_ule_eq:
; RV32I: # %bb.0:
-; RV32I-NEXT: bgeu a1, a0, .LBB24_3
+; RV32I-NEXT: bgeu a1, a0, .LBB24_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: beq a2, a3, .LBB24_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB24_3:
+; RV32I-NEXT: bne a2, a3, .LBB24_3
+; RV32I-NEXT: .LBB24_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB24_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: or_ule_eq:
; RV64I: # %bb.0:
-; RV64I-NEXT: bgeu a1, a0, .LBB24_3
+; RV64I-NEXT: bgeu a1, a0, .LBB24_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: beq a2, a3, .LBB24_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB24_3:
+; RV64I-NEXT: bne a2, a3, .LBB24_3
+; RV64I-NEXT: .LBB24_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB24_3:
+; RV64I-NEXT: tail bar
%5 = icmp ule i32 %0, %1
%6 = icmp eq i32 %2, %3
%7 = or i1 %5, %6
@@ -694,23 +694,23 @@ define void @or_ule_eq(i32 signext %0, i32 signext %1, i32 signext %2, i32 signe
define void @or_sge_ne(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3) {
; RV32I-LABEL: or_sge_ne:
; RV32I: # %bb.0:
-; RV32I-NEXT: bge a0, a1, .LBB25_3
+; RV32I-NEXT: bge a0, a1, .LBB25_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: bne a2, a3, .LBB25_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB25_3:
+; RV32I-NEXT: beq a2, a3, .LBB25_3
+; RV32I-NEXT: .LBB25_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB25_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: or_sge_ne:
; RV64I: # %bb.0:
-; RV64I-NEXT: bge a0, a1, .LBB25_3
+; RV64I-NEXT: bge a0, a1, .LBB25_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: bne a2, a3, .LBB25_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB25_3:
+; RV64I-NEXT: beq a2, a3, .LBB25_3
+; RV64I-NEXT: .LBB25_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB25_3:
+; RV64I-NEXT: tail bar
%5 = icmp sge i32 %0, %1
%6 = icmp ne i32 %2, %3
%7 = or i1 %5, %6
@@ -727,23 +727,23 @@ define void @or_sge_ne(i32 signext %0, i32 signext %1, i32 signext %2, i32 signe
define void @or_sle_ne(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3) {
; RV32I-LABEL: or_sle_ne:
; RV32I: # %bb.0:
-; RV32I-NEXT: bge a1, a0, .LBB26_3
+; RV32I-NEXT: bge a1, a0, .LBB26_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: bne a2, a3, .LBB26_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB26_3:
+; RV32I-NEXT: beq a2, a3, .LBB26_3
+; RV32I-NEXT: .LBB26_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB26_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: or_sle_ne:
; RV64I: # %bb.0:
-; RV64I-NEXT: bge a1, a0, .LBB26_3
+; RV64I-NEXT: bge a1, a0, .LBB26_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: bne a2, a3, .LBB26_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB26_3:
+; RV64I-NEXT: beq a2, a3, .LBB26_3
+; RV64I-NEXT: .LBB26_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB26_3:
+; RV64I-NEXT: tail bar
%5 = icmp sle i32 %0, %1
%6 = icmp ne i32 %2, %3
%7 = or i1 %5, %6
@@ -760,23 +760,23 @@ define void @or_sle_ne(i32 signext %0, i32 signext %1, i32 signext %2, i32 signe
define void @or_uge_ne(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3) {
; RV32I-LABEL: or_uge_ne:
; RV32I: # %bb.0:
-; RV32I-NEXT: bgeu a0, a1, .LBB27_3
+; RV32I-NEXT: bgeu a0, a1, .LBB27_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: bne a2, a3, .LBB27_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB27_3:
+; RV32I-NEXT: beq a2, a3, .LBB27_3
+; RV32I-NEXT: .LBB27_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB27_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: or_uge_ne:
; RV64I: # %bb.0:
-; RV64I-NEXT: bgeu a0, a1, .LBB27_3
+; RV64I-NEXT: bgeu a0, a1, .LBB27_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: bne a2, a3, .LBB27_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB27_3:
+; RV64I-NEXT: beq a2, a3, .LBB27_3
+; RV64I-NEXT: .LBB27_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB27_3:
+; RV64I-NEXT: tail bar
%5 = icmp uge i32 %0, %1
%6 = icmp ne i32 %2, %3
%7 = or i1 %5, %6
@@ -793,23 +793,23 @@ define void @or_uge_ne(i32 signext %0, i32 signext %1, i32 signext %2, i32 signe
define void @or_ule_ne(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3) {
; RV32I-LABEL: or_ule_ne:
; RV32I: # %bb.0:
-; RV32I-NEXT: bgeu a1, a0, .LBB28_3
+; RV32I-NEXT: bgeu a1, a0, .LBB28_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: bne a2, a3, .LBB28_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB28_3:
+; RV32I-NEXT: beq a2, a3, .LBB28_3
+; RV32I-NEXT: .LBB28_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB28_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: or_ule_ne:
; RV64I: # %bb.0:
-; RV64I-NEXT: bgeu a1, a0, .LBB28_3
+; RV64I-NEXT: bgeu a1, a0, .LBB28_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: bne a2, a3, .LBB28_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB28_3:
+; RV64I-NEXT: beq a2, a3, .LBB28_3
+; RV64I-NEXT: .LBB28_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB28_3:
+; RV64I-NEXT: tail bar
%5 = icmp ule i32 %0, %1
%6 = icmp ne i32 %2, %3
%7 = or i1 %5, %6
@@ -1156,23 +1156,23 @@ define void @and_sle_lt1(i32 signext %0, i32 signext %1, i32 signext %2) {
define void @or_uge_gt0(i32 signext %0, i32 signext %1, i32 signext %2) {
; RV32I-LABEL: or_uge_gt0:
; RV32I: # %bb.0:
-; RV32I-NEXT: bgeu a0, a1, .LBB39_3
+; RV32I-NEXT: bgeu a0, a1, .LBB39_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: bgtz a2, .LBB39_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB39_3:
+; RV32I-NEXT: blez a2, .LBB39_3
+; RV32I-NEXT: .LBB39_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB39_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: or_uge_gt0:
; RV64I: # %bb.0:
-; RV64I-NEXT: bgeu a0, a1, .LBB39_3
+; RV64I-NEXT: bgeu a0, a1, .LBB39_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: bgtz a2, .LBB39_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB39_3:
+; RV64I-NEXT: blez a2, .LBB39_3
+; RV64I-NEXT: .LBB39_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB39_3:
+; RV64I-NEXT: tail bar
%4 = icmp uge i32 %0, %1
%5 = icmp sgt i32 %2, 0
%6 = or i1 %4, %5
@@ -1189,23 +1189,23 @@ define void @or_uge_gt0(i32 signext %0, i32 signext %1, i32 signext %2) {
define void @or_ule_lt1(i32 signext %0, i32 signext %1, i32 signext %2) {
; RV32I-LABEL: or_ule_lt1:
; RV32I: # %bb.0:
-; RV32I-NEXT: bgeu a1, a0, .LBB40_3
+; RV32I-NEXT: bgeu a1, a0, .LBB40_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: blez a2, .LBB40_3
-; RV32I-NEXT: # %bb.2:
-; RV32I-NEXT: tail bar
-; RV32I-NEXT: .LBB40_3:
+; RV32I-NEXT: bgtz a2, .LBB40_3
+; RV32I-NEXT: .LBB40_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB40_3:
+; RV32I-NEXT: tail bar
;
; RV64I-LABEL: or_ule_lt1:
; RV64I: # %bb.0:
-; RV64I-NEXT: bgeu a1, a0, .LBB40_3
+; RV64I-NEXT: bgeu a1, a0, .LBB40_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: blez a2, .LBB40_3
-; RV64I-NEXT: # %bb.2:
-; RV64I-NEXT: tail bar
-; RV64I-NEXT: .LBB40_3:
+; RV64I-NEXT: bgtz a2, .LBB40_3
+; RV64I-NEXT: .LBB40_2:
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB40_3:
+; RV64I-NEXT: tail bar
%4 = icmp ule i32 %0, %1
%5 = icmp slt i32 %2, 1
%6 = or i1 %4, %5
diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
index 87f2a6306bd60..0a0870e9fd7ed 100644
--- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
+++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
@@ -754,20 +754,20 @@ define i64 @dec_of_zexted_cmp_i64(i64 %x) {
define void @zext_nneg_dominating_icmp_i64(i16 signext %0) {
; RV32I-LABEL: zext_nneg_dominating_icmp_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: bltz a0, .LBB46_2
+; RV32I-NEXT: bgez a0, .LBB46_2
; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB46_2:
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: tail bar_i64
-; RV32I-NEXT: .LBB46_2:
-; RV32I-NEXT: ret
;
; RV64-LABEL: zext_nneg_dominating_icmp_i64:
; RV64: # %bb.0:
-; RV64-NEXT: bltz a0, .LBB46_2
+; RV64-NEXT: bgez a0, .LBB46_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar_i64
-; RV64-NEXT: .LBB46_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB46_2:
+; RV64-NEXT: tail bar_i64
%2 = icmp sgt i16 %0, -1
br i1 %2, label %3, label %5
@@ -785,19 +785,19 @@ declare void @bar_i64(i64)
define void @zext_nneg_dominating_icmp_i32(i16 signext %0) {
; RV32I-LABEL: zext_nneg_dominating_icmp_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: bltz a0, .LBB47_2
+; RV32I-NEXT: bgez a0, .LBB47_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: tail bar_i32
-; RV32I-NEXT: .LBB47_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB47_2:
+; RV32I-NEXT: tail bar_i32
;
; RV64-LABEL: zext_nneg_dominating_icmp_i32:
; RV64: # %bb.0:
-; RV64-NEXT: bltz a0, .LBB47_2
+; RV64-NEXT: bgez a0, .LBB47_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar_i32
-; RV64-NEXT: .LBB47_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB47_2:
+; RV64-NEXT: tail bar_i32
%2 = icmp sgt i16 %0, -1
br i1 %2, label %3, label %5
@@ -817,19 +817,19 @@ declare void @bar_i32(i32)
define void @zext_nneg_dominating_icmp_i32_signext(i16 signext %0) {
; RV32I-LABEL: zext_nneg_dominating_icmp_i32_signext:
; RV32I: # %bb.0:
-; RV32I-NEXT: bltz a0, .LBB48_2
+; RV32I-NEXT: bgez a0, .LBB48_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: tail bar_i32
-; RV32I-NEXT: .LBB48_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB48_2:
+; RV32I-NEXT: tail bar_i32
;
; RV64-LABEL: zext_nneg_dominating_icmp_i32_signext:
; RV64: # %bb.0:
-; RV64-NEXT: bltz a0, .LBB48_2
+; RV64-NEXT: bgez a0, .LBB48_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar_i32
-; RV64-NEXT: .LBB48_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB48_2:
+; RV64-NEXT: tail bar_i32
%2 = icmp sgt i16 %0, -1
br i1 %2, label %3, label %5
@@ -847,19 +847,19 @@ define void @zext_nneg_dominating_icmp_i32_signext(i16 signext %0) {
define void @zext_nneg_dominating_icmp_i32_zeroext(i16 signext %0) {
; RV32I-LABEL: zext_nneg_dominating_icmp_i32_zeroext:
; RV32I: # %bb.0:
-; RV32I-NEXT: bltz a0, .LBB49_2
+; RV32I-NEXT: bgez a0, .LBB49_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: tail bar_i32
-; RV32I-NEXT: .LBB49_2:
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB49_2:
+; RV32I-NEXT: tail bar_i32
;
; RV64-LABEL: zext_nneg_dominating_icmp_i32_zeroext:
; RV64: # %bb.0:
-; RV64-NEXT: bltz a0, .LBB49_2
+; RV64-NEXT: bgez a0, .LBB49_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: tail bar_i32
-; RV64-NEXT: .LBB49_2:
; RV64-NEXT: ret
+; RV64-NEXT: .LBB49_2:
+; RV64-NEXT: tail bar_i32
%2 = icmp sgt i16 %0, -1
br i1 %2, label %3, label %5
@@ -883,8 +883,13 @@ define void @load_zext_nneg_sext_cse(ptr %p) nounwind {
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: lh s0, 0(a0)
-; RV32I-NEXT: bltz s0, .LBB50_2
-; RV32I-NEXT: # %bb.1: # %bb1
+; RV32I-NEXT: bgez s0, .LBB50_2
+; RV32I-NEXT: # %bb.1: # %bb2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB50_2: # %bb1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call bar_i16
; RV32I-NEXT: mv a0, s0
@@ -892,11 +897,6 @@ define void @load_zext_nneg_sext_cse(ptr %p) nounwind {
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: tail bar_i32
-; RV32I-NEXT: .LBB50_2: # %bb2
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
-; RV32I-NEXT: ret
;
; RV64-LABEL: load_zext_nneg_sext_cse:
; RV64: # %bb.0:
@@ -904,8 +904,13 @@ define void @load_zext_nneg_sext_cse(ptr %p) nounwind {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64-NEXT: lh s0, 0(a0)
-; RV64-NEXT: bltz s0, .LBB50_2
-; RV64-NEXT: # %bb.1: # %bb1
+; RV64-NEXT: bgez s0, .LBB50_2
+; RV64-NEXT: # %bb.1: # %bb2
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB50_2: # %bb1
; RV64-NEXT: mv a0, s0
; RV64-NEXT: call bar_i16
; RV64-NEXT: mv a0, s0
@@ -913,11 +918,6 @@ define void @load_zext_nneg_sext_cse(ptr %p) nounwind {
; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: tail bar_i32
-; RV64-NEXT: .LBB50_2: # %bb2
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
%load = load i16, ptr %p
%zext = zext nneg i16 %load to i32
%cmp = icmp sgt i16 %load, -1
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 249dabba0cc28..24aebefbda2b8 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -414,7 +414,7 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV32I-NEXT: bnez t2, .LBB10_14
; RV32I-NEXT: .LBB10_4:
; RV32I-NEXT: beqz a6, .LBB10_6
-; RV32I-NEXT: .LBB10_5:
+; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: mv t0, a4
; RV32I-NEXT: .LBB10_6:
; RV32I-NEXT: slli t3, t0, 1
@@ -456,8 +456,7 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV32I-NEXT: beqz t2, .LBB10_4
; RV32I-NEXT: .LBB10_14:
; RV32I-NEXT: mv a4, t1
-; RV32I-NEXT: bnez a6, .LBB10_5
-; RV32I-NEXT: j .LBB10_6
+; RV32I-NEXT: j .LBB10_4
;
; RV64I-LABEL: fshr128_minsize:
; RV64I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/simplify-condbr.ll b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
index 3f9a73607103a..8999d814bee9d 100644
--- a/llvm/test/CodeGen/RISCV/simplify-condbr.ll
+++ b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
@@ -30,12 +30,12 @@ define fastcc i32 @S_regrepeat(ptr %startposp, i32 %max, i8 %0, i1 %cmp343) noun
; CHECK-NEXT: bltu a1, a2, .LBB0_8
; CHECK-NEXT: # %bb.2: # %do_exactf
; CHECK-NEXT: andi a3, a3, 1
-; CHECK-NEXT: beqz a3, .LBB0_10
+; CHECK-NEXT: beqz a3, .LBB0_8
; CHECK-NEXT: # %bb.3: # %land.rhs251
; CHECK-NEXT: lw zero, 0(zero)
; CHECK-NEXT: li s0, 1
-; CHECK-NEXT: bnez s0, .LBB0_9
-; CHECK-NEXT: j .LBB0_8
+; CHECK-NEXT: beqz s0, .LBB0_8
+; CHECK-NEXT: j .LBB0_9
; CHECK-NEXT: .LBB0_4: # %sw.bb336
; CHECK-NEXT: mv s1, a0
; CHECK-NEXT: li s0, 0
@@ -62,8 +62,6 @@ define fastcc i32 @S_regrepeat(ptr %startposp, i32 %max, i8 %0, i1 %cmp343) noun
; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_10:
-; CHECK-NEXT: j .LBB0_8
entry:
switch i8 %0, label %if.else1492 [
i8 19, label %sw.bb336
@@ -113,32 +111,29 @@ define ptr @Perl_pp_refassign(ptr %PL_stack_sp, i1 %tobool.not, i1 %tobool3.not,
; CHECK-LABEL: Perl_pp_refassign:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: beqz a1, .LBB1_3
+; CHECK-NEXT: beqz a1, .LBB1_6
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: andi a2, a2, 1
-; CHECK-NEXT: bnez a2, .LBB1_4
-; CHECK-NEXT: .LBB1_2: # %cond.true4
+; CHECK-NEXT: bnez a2, .LBB1_5
+; CHECK-NEXT: # %bb.3: # %cond.true4
; CHECK-NEXT: ld a0, 0(a0)
; CHECK-NEXT: snez a0, a0
-; CHECK-NEXT: bnez a0, .LBB1_5
-; CHECK-NEXT: j .LBB1_6
-; CHECK-NEXT: .LBB1_3: # %cond.true
-; CHECK-NEXT: ld a1, 0(a0)
-; CHECK-NEXT: andi a2, a2, 1
-; CHECK-NEXT: beqz a2, .LBB1_2
-; CHECK-NEXT: .LBB1_4:
-; CHECK-NEXT: j .LBB1_6
-; CHECK-NEXT: .LBB1_5: # %sw.bb85
+; CHECK-NEXT: beqz a0, .LBB1_5
+; CHECK-NEXT: # %bb.4: # %sw.bb85
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: ld a0, 0(a1)
; CHECK-NEXT: call Perl_av_store
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .LBB1_6: # %common.ret
+; CHECK-NEXT: .LBB1_5: # %common.ret
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB1_6: # %cond.true
+; CHECK-NEXT: ld a1, 0(a0)
+; CHECK-NEXT: j .LBB1_2
entry:
br i1 %tobool.not, label %cond.end, label %cond.true
diff --git a/llvm/test/CodeGen/RISCV/ssub_sat.ll b/llvm/test/CodeGen/RISCV/ssub_sat.ll
index cc5cd76e913c6..31afdd82bd41e 100644
--- a/llvm/test/CodeGen/RISCV/ssub_sat.ll
+++ b/llvm/test/CodeGen/RISCV/ssub_sat.ll
@@ -96,15 +96,14 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; RV32I-NEXT: lui a1, 8
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: bge a0, a1, .LBB2_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB2_1:
; RV32I-NEXT: lui a1, 1048568
; RV32I-NEXT: bge a1, a0, .LBB2_4
-; RV32I-NEXT: .LBB2_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB2_3:
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lui a1, 1048568
-; RV32I-NEXT: blt a1, a0, .LBB2_2
+; RV32I-NEXT: j .LBB2_1
; RV32I-NEXT: .LBB2_4:
; RV32I-NEXT: lui a0, 1048568
; RV32I-NEXT: ret
@@ -115,15 +114,14 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; RV64I-NEXT: lui a1, 8
; RV64I-NEXT: addiw a1, a1, -1
; RV64I-NEXT: bge a0, a1, .LBB2_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB2_1:
; RV64I-NEXT: lui a1, 1048568
; RV64I-NEXT: bge a1, a0, .LBB2_4
-; RV64I-NEXT: .LBB2_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB2_3:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: lui a1, 1048568
-; RV64I-NEXT: blt a1, a0, .LBB2_2
+; RV64I-NEXT: j .LBB2_1
; RV64I-NEXT: .LBB2_4:
; RV64I-NEXT: lui a0, 1048568
; RV64I-NEXT: ret
@@ -157,15 +155,14 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: li a1, 127
; RV32I-NEXT: bge a0, a1, .LBB3_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB3_1:
; RV32I-NEXT: li a1, -128
; RV32I-NEXT: bge a1, a0, .LBB3_4
-; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: li a0, 127
-; RV32I-NEXT: li a1, -128
-; RV32I-NEXT: blt a1, a0, .LBB3_2
+; RV32I-NEXT: j .LBB3_1
; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: li a0, -128
; RV32I-NEXT: ret
@@ -175,15 +172,14 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: li a1, 127
; RV64I-NEXT: bge a0, a1, .LBB3_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB3_1:
; RV64I-NEXT: li a1, -128
; RV64I-NEXT: bge a1, a0, .LBB3_4
-; RV64I-NEXT: .LBB3_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB3_3:
; RV64I-NEXT: li a0, 127
-; RV64I-NEXT: li a1, -128
-; RV64I-NEXT: blt a1, a0, .LBB3_2
+; RV64I-NEXT: j .LBB3_1
; RV64I-NEXT: .LBB3_4:
; RV64I-NEXT: li a0, -128
; RV64I-NEXT: ret
@@ -215,15 +211,14 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: li a1, 7
; RV32I-NEXT: bge a0, a1, .LBB4_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB4_1:
; RV32I-NEXT: li a1, -8
; RV32I-NEXT: bge a1, a0, .LBB4_4
-; RV32I-NEXT: .LBB4_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: li a0, 7
-; RV32I-NEXT: li a1, -8
-; RV32I-NEXT: blt a1, a0, .LBB4_2
+; RV32I-NEXT: j .LBB4_1
; RV32I-NEXT: .LBB4_4:
; RV32I-NEXT: li a0, -8
; RV32I-NEXT: ret
@@ -233,15 +228,14 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: li a1, 7
; RV64I-NEXT: bge a0, a1, .LBB4_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB4_1:
; RV64I-NEXT: li a1, -8
; RV64I-NEXT: bge a1, a0, .LBB4_4
-; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_3:
; RV64I-NEXT: li a0, 7
-; RV64I-NEXT: li a1, -8
-; RV64I-NEXT: blt a1, a0, .LBB4_2
+; RV64I-NEXT: j .LBB4_1
; RV64I-NEXT: .LBB4_4:
; RV64I-NEXT: li a0, -8
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
index 0499992b71778..8c341922b887c 100644
--- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
@@ -106,15 +106,14 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: addi a1, a2, -1
; RV32I-NEXT: bge a0, a1, .LBB2_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB2_1:
; RV32I-NEXT: lui a1, 1048568
; RV32I-NEXT: bge a1, a0, .LBB2_4
-; RV32I-NEXT: .LBB2_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB2_3:
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: lui a1, 1048568
-; RV32I-NEXT: blt a1, a0, .LBB2_2
+; RV32I-NEXT: j .LBB2_1
; RV32I-NEXT: .LBB2_4:
; RV32I-NEXT: lui a0, 1048568
; RV32I-NEXT: ret
@@ -130,15 +129,14 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: addiw a1, a2, -1
; RV64I-NEXT: bge a0, a1, .LBB2_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB2_1:
; RV64I-NEXT: lui a1, 1048568
; RV64I-NEXT: bge a1, a0, .LBB2_4
-; RV64I-NEXT: .LBB2_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB2_3:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: lui a1, 1048568
-; RV64I-NEXT: blt a1, a0, .LBB2_2
+; RV64I-NEXT: j .LBB2_1
; RV64I-NEXT: .LBB2_4:
; RV64I-NEXT: lui a0, 1048568
; RV64I-NEXT: ret
@@ -184,15 +182,14 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: li a1, 127
; RV32I-NEXT: bge a0, a1, .LBB3_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB3_1:
; RV32I-NEXT: li a1, -128
; RV32I-NEXT: bge a1, a0, .LBB3_4
-; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: li a0, 127
-; RV32I-NEXT: li a1, -128
-; RV32I-NEXT: blt a1, a0, .LBB3_2
+; RV32I-NEXT: j .LBB3_1
; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: li a0, -128
; RV32I-NEXT: ret
@@ -207,15 +204,14 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: li a1, 127
; RV64I-NEXT: bge a0, a1, .LBB3_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB3_1:
; RV64I-NEXT: li a1, -128
; RV64I-NEXT: bge a1, a0, .LBB3_4
-; RV64I-NEXT: .LBB3_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB3_3:
; RV64I-NEXT: li a0, 127
-; RV64I-NEXT: li a1, -128
-; RV64I-NEXT: blt a1, a0, .LBB3_2
+; RV64I-NEXT: j .LBB3_1
; RV64I-NEXT: .LBB3_4:
; RV64I-NEXT: li a0, -128
; RV64I-NEXT: ret
@@ -259,15 +255,14 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: li a1, 7
; RV32I-NEXT: bge a0, a1, .LBB4_3
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: .LBB4_1:
; RV32I-NEXT: li a1, -8
; RV32I-NEXT: bge a1, a0, .LBB4_4
-; RV32I-NEXT: .LBB4_2:
+; RV32I-NEXT: # %bb.2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: li a0, 7
-; RV32I-NEXT: li a1, -8
-; RV32I-NEXT: blt a1, a0, .LBB4_2
+; RV32I-NEXT: j .LBB4_1
; RV32I-NEXT: .LBB4_4:
; RV32I-NEXT: li a0, -8
; RV32I-NEXT: ret
@@ -282,15 +277,14 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: li a1, 7
; RV64I-NEXT: bge a0, a1, .LBB4_3
-; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: .LBB4_1:
; RV64I-NEXT: li a1, -8
; RV64I-NEXT: bge a1, a0, .LBB4_4
-; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: # %bb.2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_3:
; RV64I-NEXT: li a0, 7
-; RV64I-NEXT: li a1, -8
-; RV64I-NEXT: blt a1, a0, .LBB4_2
+; RV64I-NEXT: j .LBB4_1
; RV64I-NEXT: .LBB4_4:
; RV64I-NEXT: li a0, -8
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/xcvbi.ll b/llvm/test/CodeGen/RISCV/xcvbi.ll
index ca2e416e334f0..67ee49e371c6b 100644
--- a/llvm/test/CodeGen/RISCV/xcvbi.ll
+++ b/llvm/test/CodeGen/RISCV/xcvbi.ll
@@ -8,8 +8,7 @@ define i32 @beqimm(i32 %a) {
; CHECK_NOPT-LABEL: beqimm:
; CHECK_NOPT: # %bb.0:
; CHECK_NOPT-NEXT: cv.beqimm a0, 5, .LBB0_2
-; CHECK_NOPT-NEXT: j .LBB0_1
-; CHECK_NOPT-NEXT: .LBB0_1: # %f
+; CHECK_NOPT-NEXT: # %bb.1: # %f
; CHECK_NOPT-NEXT: li a0, 0
; CHECK_NOPT-NEXT: ret
; CHECK_NOPT-NEXT: .LBB0_2: # %t
@@ -37,8 +36,7 @@ define i32 @bneimm(i32 %a) {
; CHECK_NOPT-LABEL: bneimm:
; CHECK_NOPT: # %bb.0:
; CHECK_NOPT-NEXT: cv.bneimm a0, 5, .LBB1_2
-; CHECK_NOPT-NEXT: j .LBB1_1
-; CHECK_NOPT-NEXT: .LBB1_1: # %f
+; CHECK_NOPT-NEXT: # %bb.1: # %f
; CHECK_NOPT-NEXT: li a0, 0
; CHECK_NOPT-NEXT: ret
; CHECK_NOPT-NEXT: .LBB1_2: # %t
>From 47c9e17fcacb7b2fe6083da492a62cce6bb6b64c Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Mon, 7 Apr 2025 20:05:02 -0300
Subject: [PATCH 2/6] Removed note added by update_llc_test_checks.py
Signed-off-by: Mikhail R. Gadelha <mikhail at igalia.com>
---
llvm/test/CodeGen/RISCV/O0-pipeline.ll | 1 -
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 -
2 files changed, 2 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index 71431c452233f..2a15b64c8651a 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv32 -O0 -debug-pass=Structure < %s -o /dev/null 2>&1 | \
; RUN: grep -v "Verify generated machine code" | \
; RUN: FileCheck %s --check-prefixes=CHECK
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 5b34e9defcdb8..80fd4d8ba57fc 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv32 -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | \
; RUN: grep -v "Verify generated machine code" | \
; RUN: FileCheck %s --check-prefixes=CHECK
>From 515ce5b5fac72d279c2db8716484663e81807719 Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Mon, 7 Apr 2025 21:25:20 -0300
Subject: [PATCH 3/6] Updated tests
Signed-off-by: Mikhail R. Gadelha <mikhail at igalia.com>
---
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 3374 +++++--------
.../RISCV/rvv/fixed-vectors-masked-scatter.ll | 4283 ++++++++---------
.../RISCV/lsr-drop-solution.ll | 20 +-
3 files changed, 3090 insertions(+), 4587 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index d1f31a6c59083..ee9ec98d387c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -721,18 +721,19 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8
; RV64ZVE32F-NEXT: .LBB12_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB12_14
-; RV64ZVE32F-NEXT: .LBB12_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB12_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-NEXT: .LBB12_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB12_15
-; RV64ZVE32F-NEXT: .LBB12_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB12_16
-; RV64ZVE32F-NEXT: .LBB12_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB12_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB12_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -793,32 +794,6 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB12_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: j .LBB12_5
-; RV64ZVE32F-NEXT: .LBB12_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: j .LBB12_6
-; RV64ZVE32F-NEXT: .LBB12_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: j .LBB12_7
%ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
%v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
ret <8 x i8> %v
@@ -1429,21 +1404,10 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: .LBB23_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB23_14
-; RV64ZVE32F-NEXT: .LBB23_5: # %else5
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB23_15
-; RV64ZVE32F-NEXT: .LBB23_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB23_16
-; RV64ZVE32F-NEXT: .LBB23_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB23_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB23_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -1525,39 +1489,6 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB23_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: j .LBB23_5
-; RV64ZVE32F-NEXT: .LBB23_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: j .LBB23_6
-; RV64ZVE32F-NEXT: .LBB23_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: j .LBB23_7
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
@@ -1614,21 +1545,10 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: .LBB24_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB24_14
-; RV64ZVE32F-NEXT: .LBB24_5: # %else5
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB24_15
-; RV64ZVE32F-NEXT: .LBB24_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB24_16
-; RV64ZVE32F-NEXT: .LBB24_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB24_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB24_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -1710,39 +1630,6 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB24_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: j .LBB24_5
-; RV64ZVE32F-NEXT: .LBB24_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: j .LBB24_6
-; RV64ZVE32F-NEXT: .LBB24_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: j .LBB24_7
%eidxs = sext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
@@ -1800,37 +1687,71 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: .LBB25_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB25_14
-; RV64ZVE32F-NEXT: .LBB25_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB25_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-NEXT: .LBB25_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB25_15
-; RV64ZVE32F-NEXT: .LBB25_6: # %else8
+; RV64ZVE32F-NEXT: beqz a2, .LBB25_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
+; RV64ZVE32F-NEXT: .LBB25_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB25_16
-; RV64ZVE32F-NEXT: .LBB25_7: # %else11
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB25_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4
+; RV64ZVE32F-NEXT: .LBB25_10: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB25_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB25_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
-; RV64ZVE32F-NEXT: .LBB25_9: # %else14
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 5
+; RV64ZVE32F-NEXT: .LBB25_12: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB25_11
-; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB25_14
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -1838,14 +1759,14 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
-; RV64ZVE32F-NEXT: .LBB25_11: # %else17
+; RV64ZVE32F-NEXT: .LBB25_14: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB25_13
-; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB25_16
+; RV64ZVE32F-NEXT: # %bb.15: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
-; RV64ZVE32F-NEXT: andi a1, a1, 255
+; RV64ZVE32F-NEXT: zext.b a1, a1
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
@@ -1853,46 +1774,10 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
-; RV64ZVE32F-NEXT: .LBB25_13: # %else20
+; RV64ZVE32F-NEXT: .LBB25_16: # %else20
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB25_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: j .LBB25_5
-; RV64ZVE32F-NEXT: .LBB25_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: zext.b a2, a2
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: j .LBB25_6
-; RV64ZVE32F-NEXT: .LBB25_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: j .LBB25_7
%eidxs = zext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
@@ -1947,18 +1832,20 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: .LBB26_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB26_14
-; RV64ZVE32F-NEXT: .LBB26_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB26_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-NEXT: .LBB26_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB26_15
-; RV64ZVE32F-NEXT: .LBB26_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB26_16
-; RV64ZVE32F-NEXT: .LBB26_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB26_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB26_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2024,35 +1911,6 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB26_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: j .LBB26_5
-; RV64ZVE32F-NEXT: .LBB26_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: j .LBB26_6
-; RV64ZVE32F-NEXT: .LBB26_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: j .LBB26_7
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
@@ -2551,18 +2409,21 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: .LBB35_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
-; RV64ZVE32F-NEXT: .LBB35_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB35_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
-; RV64ZVE32F-NEXT: .LBB35_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB35_16
-; RV64ZVE32F-NEXT: .LBB35_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB35_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -2633,39 +2494,6 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB35_5
-; RV64ZVE32F-NEXT: .LBB35_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB35_6
-; RV64ZVE32F-NEXT: .LBB35_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB35_7
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
@@ -2721,18 +2549,21 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: .LBB36_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB36_14
-; RV64ZVE32F-NEXT: .LBB36_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB36_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB36_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB36_15
-; RV64ZVE32F-NEXT: .LBB36_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB36_16
-; RV64ZVE32F-NEXT: .LBB36_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB36_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB36_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -2803,39 +2634,6 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB36_5
-; RV64ZVE32F-NEXT: .LBB36_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB36_6
-; RV64ZVE32F-NEXT: .LBB36_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB36_7
%eidxs = sext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
@@ -2895,22 +2693,56 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: .LBB37_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB37_14
-; RV64ZVE32F-NEXT: .LBB37_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB37_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB37_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB37_15
-; RV64ZVE32F-NEXT: .LBB37_6: # %else8
+; RV64ZVE32F-NEXT: beqz a2, .LBB37_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 3
+; RV64ZVE32F-NEXT: .LBB37_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB37_16
-; RV64ZVE32F-NEXT: .LBB37_7: # %else11
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB37_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
+; RV64ZVE32F-NEXT: .LBB37_10: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB37_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB37_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
@@ -2918,14 +2750,14 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
-; RV64ZVE32F-NEXT: .LBB37_9: # %else14
+; RV64ZVE32F-NEXT: .LBB37_12: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB37_11
-; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB37_14
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
@@ -2933,14 +2765,14 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
-; RV64ZVE32F-NEXT: .LBB37_11: # %else17
+; RV64ZVE32F-NEXT: .LBB37_14: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB37_13
-; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB37_16
+; RV64ZVE32F-NEXT: # %bb.15: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
-; RV64ZVE32F-NEXT: andi a1, a1, 255
+; RV64ZVE32F-NEXT: zext.b a1, a1
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
@@ -2948,46 +2780,10 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
-; RV64ZVE32F-NEXT: .LBB37_13: # %else20
+; RV64ZVE32F-NEXT: .LBB37_16: # %else20
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB37_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB37_5
-; RV64ZVE32F-NEXT: .LBB37_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: zext.b a2, a2
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB37_6
-; RV64ZVE32F-NEXT: .LBB37_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB37_7
%eidxs = zext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
@@ -3046,18 +2842,21 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: .LBB38_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB38_14
-; RV64ZVE32F-NEXT: .LBB38_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB38_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB38_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB38_15
-; RV64ZVE32F-NEXT: .LBB38_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB38_16
-; RV64ZVE32F-NEXT: .LBB38_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB38_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB38_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -3128,39 +2927,6 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB38_5
-; RV64ZVE32F-NEXT: .LBB38_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB38_6
-; RV64ZVE32F-NEXT: .LBB38_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB38_7
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
@@ -3218,18 +2984,21 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: .LBB39_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB39_14
-; RV64ZVE32F-NEXT: .LBB39_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB39_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB39_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB39_15
-; RV64ZVE32F-NEXT: .LBB39_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB39_16
-; RV64ZVE32F-NEXT: .LBB39_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB39_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB39_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -3300,39 +3069,6 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB39_5
-; RV64ZVE32F-NEXT: .LBB39_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB39_6
-; RV64ZVE32F-NEXT: .LBB39_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB39_7
%eidxs = sext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
@@ -3393,18 +3129,22 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: .LBB40_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB40_14
-; RV64ZVE32F-NEXT: .LBB40_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB40_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB40_15
-; RV64ZVE32F-NEXT: .LBB40_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB40_16
-; RV64ZVE32F-NEXT: .LBB40_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB40_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -3480,42 +3220,6 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 46
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB40_5
-; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 46
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB40_6
-; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 46
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB40_7
%eidxs = zext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
@@ -3568,21 +3272,10 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: .LBB41_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB41_14
-; RV64ZVE32F-NEXT: .LBB41_5: # %else5
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB41_15
-; RV64ZVE32F-NEXT: .LBB41_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB41_16
-; RV64ZVE32F-NEXT: .LBB41_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB41_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB41_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
@@ -3594,13 +3287,13 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB41_15
-; RV64ZVE32F-NEXT: # %bb.7: # %else8
+; RV64ZVE32F-NEXT: .LBB41_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB41_16
; RV64ZVE32F-NEXT: .LBB41_8: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB41_10
-; RV64ZVE32F-NEXT: .LBB41_9: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -3648,28 +3341,17 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
-; RV64ZVE32F-NEXT: j .LBB41_5
-; RV64ZVE32F-NEXT: .LBB41_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB41_6
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 3
+; RV64ZVE32F-NEXT: j .LBB41_7
; RV64ZVE32F-NEXT: .LBB41_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
-; RV64ZVE32F-NEXT: j .LBB41_7
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
+; RV64ZVE32F-NEXT: j .LBB41_8
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
@@ -4464,58 +4146,60 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB48_6: # %else2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB48_14
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a6, .LBB48_10
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a6, v8
+; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: .LBB48_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB48_15
+; RV64ZVE32F-NEXT: bnez a7, .LBB48_11
; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: j .LBB48_12
; RV64ZVE32F-NEXT: .LBB48_10:
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB48_16
-; RV64ZVE32F-NEXT: # %bb.11:
-; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB48_12:
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB48_17
-; RV64ZVE32F-NEXT: # %bb.13:
-; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB48_18
-; RV64ZVE32F-NEXT: .LBB48_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: j .LBB48_8
-; RV64ZVE32F-NEXT: .LBB48_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a7, v8
+; RV64ZVE32F-NEXT: .LBB48_11: # %cond.load7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB48_10
-; RV64ZVE32F-NEXT: .LBB48_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s t0, v9
+; RV64ZVE32F-NEXT: .LBB48_12: # %else8
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz t0, .LBB48_16
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB48_12
-; RV64ZVE32F-NEXT: .LBB48_17: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s t1, v8
-; RV64ZVE32F-NEXT: slli t1, t1, 3
-; RV64ZVE32F-NEXT: add t1, a1, t1
-; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB48_18: # %else14
+; RV64ZVE32F-NEXT: .LBB48_14: # %cond.load10
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: bnez t1, .LBB48_17
+; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: ld t1, 40(a2)
+; RV64ZVE32F-NEXT: j .LBB48_18
+; RV64ZVE32F-NEXT: .LBB48_16:
+; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: j .LBB48_14
+; RV64ZVE32F-NEXT: .LBB48_17: # %cond.load13
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s t1, v9
+; RV64ZVE32F-NEXT: slli t1, t1, 3
+; RV64ZVE32F-NEXT: add t1, a1, t1
+; RV64ZVE32F-NEXT: ld t1, 0(t1)
+; RV64ZVE32F-NEXT: .LBB48_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB48_22
; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
@@ -4741,58 +4425,60 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB49_6: # %else2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB49_14
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a6, .LBB49_10
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a6, v8
+; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: .LBB49_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB49_15
+; RV64ZVE32F-NEXT: bnez a7, .LBB49_11
; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: j .LBB49_12
; RV64ZVE32F-NEXT: .LBB49_10:
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB49_16
-; RV64ZVE32F-NEXT: # %bb.11:
-; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB49_12:
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB49_17
-; RV64ZVE32F-NEXT: # %bb.13:
-; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB49_18
-; RV64ZVE32F-NEXT: .LBB49_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: j .LBB49_8
-; RV64ZVE32F-NEXT: .LBB49_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a7, v8
+; RV64ZVE32F-NEXT: .LBB49_11: # %cond.load7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB49_10
-; RV64ZVE32F-NEXT: .LBB49_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s t0, v9
+; RV64ZVE32F-NEXT: .LBB49_12: # %else8
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz t0, .LBB49_16
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB49_12
+; RV64ZVE32F-NEXT: .LBB49_14: # %cond.load10
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: bnez t1, .LBB49_17
+; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: ld t1, 40(a2)
+; RV64ZVE32F-NEXT: j .LBB49_18
+; RV64ZVE32F-NEXT: .LBB49_16:
+; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: j .LBB49_14
; RV64ZVE32F-NEXT: .LBB49_17: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s t1, v8
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB49_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB49_22
; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
@@ -5022,62 +4708,64 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB50_6: # %else2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB50_14
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a6, .LBB50_10
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a6, v8
-; RV64ZVE32F-NEXT: andi a6, a6, 255
+; RV64ZVE32F-NEXT: vmv.x.s a6, v9
+; RV64ZVE32F-NEXT: zext.b a6, a6
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: .LBB50_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB50_15
+; RV64ZVE32F-NEXT: bnez a7, .LBB50_11
; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: j .LBB50_12
; RV64ZVE32F-NEXT: .LBB50_10:
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB50_16
-; RV64ZVE32F-NEXT: # %bb.11:
-; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB50_12:
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB50_17
-; RV64ZVE32F-NEXT: # %bb.13:
-; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB50_18
-; RV64ZVE32F-NEXT: .LBB50_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: j .LBB50_8
-; RV64ZVE32F-NEXT: .LBB50_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a7, v8
-; RV64ZVE32F-NEXT: andi a7, a7, 255
+; RV64ZVE32F-NEXT: .LBB50_11: # %cond.load7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v9
+; RV64ZVE32F-NEXT: zext.b a7, a7
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB50_10
-; RV64ZVE32F-NEXT: .LBB50_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s t0, v9
-; RV64ZVE32F-NEXT: andi t0, t0, 255
+; RV64ZVE32F-NEXT: .LBB50_12: # %else8
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz t0, .LBB50_16
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s t0, v8
+; RV64ZVE32F-NEXT: zext.b t0, t0
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB50_12
+; RV64ZVE32F-NEXT: .LBB50_14: # %cond.load10
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: bnez t1, .LBB50_17
+; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: ld t1, 40(a2)
+; RV64ZVE32F-NEXT: j .LBB50_18
+; RV64ZVE32F-NEXT: .LBB50_16:
+; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: j .LBB50_14
; RV64ZVE32F-NEXT: .LBB50_17: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s t1, v8
-; RV64ZVE32F-NEXT: andi t1, t1, 255
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s t1, v9
+; RV64ZVE32F-NEXT: zext.b t1, t1
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB50_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB50_22
; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
@@ -5308,58 +4996,60 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB51_6: # %else2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB51_14
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a6, .LBB51_10
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a6, v8
+; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: .LBB51_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB51_15
+; RV64ZVE32F-NEXT: bnez a7, .LBB51_11
; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: j .LBB51_12
; RV64ZVE32F-NEXT: .LBB51_10:
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB51_16
-; RV64ZVE32F-NEXT: # %bb.11:
-; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB51_12:
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB51_17
-; RV64ZVE32F-NEXT: # %bb.13:
-; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB51_18
-; RV64ZVE32F-NEXT: .LBB51_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: j .LBB51_8
-; RV64ZVE32F-NEXT: .LBB51_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a7, v8
+; RV64ZVE32F-NEXT: .LBB51_11: # %cond.load7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB51_10
-; RV64ZVE32F-NEXT: .LBB51_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s t0, v9
+; RV64ZVE32F-NEXT: .LBB51_12: # %else8
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz t0, .LBB51_16
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB51_12
+; RV64ZVE32F-NEXT: .LBB51_14: # %cond.load10
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: bnez t1, .LBB51_17
+; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: ld t1, 40(a2)
+; RV64ZVE32F-NEXT: j .LBB51_18
+; RV64ZVE32F-NEXT: .LBB51_16:
+; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: j .LBB51_14
; RV64ZVE32F-NEXT: .LBB51_17: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s t1, v8
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB51_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB51_22
; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
@@ -5587,58 +5277,60 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB52_6: # %else2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB52_14
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a6, .LBB52_10
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a6, v8
+; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: .LBB52_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB52_15
+; RV64ZVE32F-NEXT: bnez a7, .LBB52_11
; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: j .LBB52_12
; RV64ZVE32F-NEXT: .LBB52_10:
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB52_16
-; RV64ZVE32F-NEXT: # %bb.11:
-; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB52_12:
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB52_17
-; RV64ZVE32F-NEXT: # %bb.13:
-; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB52_18
-; RV64ZVE32F-NEXT: .LBB52_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: j .LBB52_8
-; RV64ZVE32F-NEXT: .LBB52_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a7, v8
+; RV64ZVE32F-NEXT: .LBB52_11: # %cond.load7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB52_10
-; RV64ZVE32F-NEXT: .LBB52_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s t0, v9
+; RV64ZVE32F-NEXT: .LBB52_12: # %else8
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz t0, .LBB52_16
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB52_12
+; RV64ZVE32F-NEXT: .LBB52_14: # %cond.load10
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: bnez t1, .LBB52_17
+; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: ld t1, 40(a2)
+; RV64ZVE32F-NEXT: j .LBB52_18
+; RV64ZVE32F-NEXT: .LBB52_16:
+; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: j .LBB52_14
; RV64ZVE32F-NEXT: .LBB52_17: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s t1, v8
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB52_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB52_22
; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
@@ -5870,62 +5562,64 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB53_6: # %else2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB53_14
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a6, .LBB53_10
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a6, v8
+; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: slli a6, a6, 48
; RV64ZVE32F-NEXT: srli a6, a6, 45
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: .LBB53_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB53_15
+; RV64ZVE32F-NEXT: bnez a7, .LBB53_11
; RV64ZVE32F-NEXT: # %bb.9:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: j .LBB53_12
; RV64ZVE32F-NEXT: .LBB53_10:
-; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB53_16
-; RV64ZVE32F-NEXT: # %bb.11:
-; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB53_12:
-; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB53_17
-; RV64ZVE32F-NEXT: # %bb.13:
-; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB53_18
-; RV64ZVE32F-NEXT: .LBB53_14:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: j .LBB53_8
-; RV64ZVE32F-NEXT: .LBB53_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a7, v8
+; RV64ZVE32F-NEXT: .LBB53_11: # %cond.load7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 48
; RV64ZVE32F-NEXT: srli a7, a7, 45
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB53_10
-; RV64ZVE32F-NEXT: .LBB53_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s t0, v9
+; RV64ZVE32F-NEXT: .LBB53_12: # %else8
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz t0, .LBB53_16
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 48
; RV64ZVE32F-NEXT: srli t0, t0, 45
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB53_12
+; RV64ZVE32F-NEXT: .LBB53_14: # %cond.load10
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: bnez t1, .LBB53_17
+; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: ld t1, 40(a2)
+; RV64ZVE32F-NEXT: j .LBB53_18
+; RV64ZVE32F-NEXT: .LBB53_16:
+; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: j .LBB53_14
; RV64ZVE32F-NEXT: .LBB53_17: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s t1, v8
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 48
; RV64ZVE32F-NEXT: srli t1, t1, 45
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB53_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB53_22
; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
@@ -6154,58 +5848,62 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB54_6: # %else2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB54_14
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a6, .LBB54_8
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a6, v8
+; RV64ZVE32F-NEXT: vmv.x.s a6, v10
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: .LBB54_8: # %cond.load4
+; RV64ZVE32F-NEXT: j .LBB54_9
+; RV64ZVE32F-NEXT: .LBB54_8:
+; RV64ZVE32F-NEXT: ld a6, 16(a2)
+; RV64ZVE32F-NEXT: .LBB54_9: # %else5
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB54_15
-; RV64ZVE32F-NEXT: # %bb.9:
-; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: .LBB54_10:
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a7, .LBB54_15
+; RV64ZVE32F-NEXT: # %bb.10: # %cond.load7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v9
+; RV64ZVE32F-NEXT: slli a7, a7, 3
+; RV64ZVE32F-NEXT: add a7, a1, a7
+; RV64ZVE32F-NEXT: ld a7, 0(a7)
+; RV64ZVE32F-NEXT: .LBB54_11: # %cond.load7
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB54_16
-; RV64ZVE32F-NEXT: # %bb.11:
+; RV64ZVE32F-NEXT: # %bb.12:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB54_12:
+; RV64ZVE32F-NEXT: .LBB54_13:
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB54_17
-; RV64ZVE32F-NEXT: # %bb.13:
+; RV64ZVE32F-NEXT: # %bb.14:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB54_18
-; RV64ZVE32F-NEXT: .LBB54_14:
-; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: j .LBB54_8
-; RV64ZVE32F-NEXT: .LBB54_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a7, v8
-; RV64ZVE32F-NEXT: slli a7, a7, 3
-; RV64ZVE32F-NEXT: add a7, a1, a7
-; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB54_10
+; RV64ZVE32F-NEXT: .LBB54_15:
+; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: j .LBB54_11
; RV64ZVE32F-NEXT: .LBB54_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s t0, v10
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB54_12
+; RV64ZVE32F-NEXT: j .LBB54_13
; RV64ZVE32F-NEXT: .LBB54_17: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s t1, v8
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB54_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB54_22
; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
@@ -6431,58 +6129,62 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB55_6: # %else2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB55_14
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a6, .LBB55_8
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a6, v8
+; RV64ZVE32F-NEXT: vmv.x.s a6, v10
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: .LBB55_8: # %cond.load4
+; RV64ZVE32F-NEXT: j .LBB55_9
+; RV64ZVE32F-NEXT: .LBB55_8:
+; RV64ZVE32F-NEXT: ld a6, 16(a2)
+; RV64ZVE32F-NEXT: .LBB55_9: # %else5
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB55_15
-; RV64ZVE32F-NEXT: # %bb.9:
-; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: .LBB55_10:
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a7, .LBB55_15
+; RV64ZVE32F-NEXT: # %bb.10: # %cond.load7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v9
+; RV64ZVE32F-NEXT: slli a7, a7, 3
+; RV64ZVE32F-NEXT: add a7, a1, a7
+; RV64ZVE32F-NEXT: ld a7, 0(a7)
+; RV64ZVE32F-NEXT: .LBB55_11: # %cond.load7
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB55_16
-; RV64ZVE32F-NEXT: # %bb.11:
+; RV64ZVE32F-NEXT: # %bb.12:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB55_12:
+; RV64ZVE32F-NEXT: .LBB55_13:
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB55_17
-; RV64ZVE32F-NEXT: # %bb.13:
+; RV64ZVE32F-NEXT: # %bb.14:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB55_18
-; RV64ZVE32F-NEXT: .LBB55_14:
-; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: j .LBB55_8
-; RV64ZVE32F-NEXT: .LBB55_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a7, v8
-; RV64ZVE32F-NEXT: slli a7, a7, 3
-; RV64ZVE32F-NEXT: add a7, a1, a7
-; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB55_10
+; RV64ZVE32F-NEXT: .LBB55_15:
+; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: j .LBB55_11
; RV64ZVE32F-NEXT: .LBB55_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s t0, v10
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB55_12
+; RV64ZVE32F-NEXT: j .LBB55_13
; RV64ZVE32F-NEXT: .LBB55_17: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s t1, v8
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB55_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB55_22
; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
@@ -6711,62 +6413,66 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB56_6: # %else2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB56_14
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a6, .LBB56_8
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a6, v8
+; RV64ZVE32F-NEXT: vmv.x.s a6, v10
; RV64ZVE32F-NEXT: slli a6, a6, 32
; RV64ZVE32F-NEXT: srli a6, a6, 29
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: .LBB56_8: # %cond.load4
+; RV64ZVE32F-NEXT: j .LBB56_9
+; RV64ZVE32F-NEXT: .LBB56_8:
+; RV64ZVE32F-NEXT: ld a6, 16(a2)
+; RV64ZVE32F-NEXT: .LBB56_9: # %else5
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB56_15
-; RV64ZVE32F-NEXT: # %bb.9:
-; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: .LBB56_10:
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a7, .LBB56_15
+; RV64ZVE32F-NEXT: # %bb.10: # %cond.load7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v9
+; RV64ZVE32F-NEXT: slli a7, a7, 32
+; RV64ZVE32F-NEXT: srli a7, a7, 29
+; RV64ZVE32F-NEXT: add a7, a1, a7
+; RV64ZVE32F-NEXT: ld a7, 0(a7)
+; RV64ZVE32F-NEXT: .LBB56_11: # %cond.load7
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB56_16
-; RV64ZVE32F-NEXT: # %bb.11:
+; RV64ZVE32F-NEXT: # %bb.12:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB56_12:
+; RV64ZVE32F-NEXT: .LBB56_13:
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB56_17
-; RV64ZVE32F-NEXT: # %bb.13:
+; RV64ZVE32F-NEXT: # %bb.14:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB56_18
-; RV64ZVE32F-NEXT: .LBB56_14:
-; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: j .LBB56_8
-; RV64ZVE32F-NEXT: .LBB56_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a7, v8
-; RV64ZVE32F-NEXT: slli a7, a7, 32
-; RV64ZVE32F-NEXT: srli a7, a7, 29
-; RV64ZVE32F-NEXT: add a7, a1, a7
-; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB56_10
+; RV64ZVE32F-NEXT: .LBB56_15:
+; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: j .LBB56_11
; RV64ZVE32F-NEXT: .LBB56_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s t0, v10
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 32
; RV64ZVE32F-NEXT: srli t0, t0, 29
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB56_12
+; RV64ZVE32F-NEXT: j .LBB56_13
; RV64ZVE32F-NEXT: .LBB56_17: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s t1, v8
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 32
; RV64ZVE32F-NEXT: srli t1, t1, 29
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB56_18: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB56_22
; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
@@ -7454,21 +7160,10 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: .LBB64_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB64_14
-; RV64ZVE32F-NEXT: .LBB64_5: # %else5
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB64_15
-; RV64ZVE32F-NEXT: .LBB64_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB64_16
-; RV64ZVE32F-NEXT: .LBB64_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB64_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB64_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -7550,39 +7245,6 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB64_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: j .LBB64_5
-; RV64ZVE32F-NEXT: .LBB64_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: j .LBB64_6
-; RV64ZVE32F-NEXT: .LBB64_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: j .LBB64_7
%ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
%v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
ret <8 x bfloat> %v
@@ -7639,21 +7301,10 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB65_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB65_14
-; RV64ZVE32F-NEXT: .LBB65_5: # %else5
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB65_15
-; RV64ZVE32F-NEXT: .LBB65_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB65_16
-; RV64ZVE32F-NEXT: .LBB65_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB65_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB65_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -7735,39 +7386,6 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB65_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: j .LBB65_5
-; RV64ZVE32F-NEXT: .LBB65_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: j .LBB65_6
-; RV64ZVE32F-NEXT: .LBB65_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: j .LBB65_7
%eidxs = sext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
%v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
@@ -7825,37 +7443,71 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB66_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB66_14
-; RV64ZVE32F-NEXT: .LBB66_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB66_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-NEXT: .LBB66_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB66_15
-; RV64ZVE32F-NEXT: .LBB66_6: # %else8
+; RV64ZVE32F-NEXT: beqz a2, .LBB66_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
+; RV64ZVE32F-NEXT: .LBB66_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB66_16
-; RV64ZVE32F-NEXT: .LBB66_7: # %else11
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB66_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4
+; RV64ZVE32F-NEXT: .LBB66_10: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB66_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB66_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
-; RV64ZVE32F-NEXT: .LBB66_9: # %else14
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 5
+; RV64ZVE32F-NEXT: .LBB66_12: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB66_11
-; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB66_14
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -7863,14 +7515,14 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
-; RV64ZVE32F-NEXT: .LBB66_11: # %else17
+; RV64ZVE32F-NEXT: .LBB66_14: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB66_13
-; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB66_16
+; RV64ZVE32F-NEXT: # %bb.15: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
-; RV64ZVE32F-NEXT: andi a1, a1, 255
+; RV64ZVE32F-NEXT: zext.b a1, a1
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
@@ -7878,46 +7530,10 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
-; RV64ZVE32F-NEXT: .LBB66_13: # %else20
+; RV64ZVE32F-NEXT: .LBB66_16: # %else20
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB66_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: j .LBB66_5
-; RV64ZVE32F-NEXT: .LBB66_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: zext.b a2, a2
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: j .LBB66_6
-; RV64ZVE32F-NEXT: .LBB66_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: j .LBB66_7
%eidxs = zext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
%v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
@@ -7972,18 +7588,20 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: .LBB67_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB67_14
-; RV64ZVE32F-NEXT: .LBB67_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB67_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-NEXT: .LBB67_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB67_15
-; RV64ZVE32F-NEXT: .LBB67_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB67_16
-; RV64ZVE32F-NEXT: .LBB67_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB67_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB67_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -8049,35 +7667,6 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB67_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-NEXT: j .LBB67_5
-; RV64ZVE32F-NEXT: .LBB67_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-NEXT: j .LBB67_6
-; RV64ZVE32F-NEXT: .LBB67_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-NEXT: j .LBB67_7
%ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
%v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
ret <8 x bfloat> %v
@@ -8623,21 +8212,10 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFH-NEXT: .LBB74_4: # %else2
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_14
-; RV64ZVE32F-ZVFH-NEXT: .LBB74_5: # %else5
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB74_6: # %else8
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_16
-; RV64ZVE32F-ZVFH-NEXT: .LBB74_7: # %else11
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_9
-; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_6
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
@@ -8719,39 +8297,6 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFH-NEXT: ret
-; RV64ZVE32F-ZVFH-NEXT: .LBB74_14: # %cond.load4
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFH-NEXT: j .LBB74_5
-; RV64ZVE32F-ZVFH-NEXT: .LBB74_15: # %cond.load7
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: j .LBB74_6
-; RV64ZVE32F-ZVFH-NEXT: .LBB74_16: # %cond.load10
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: j .LBB74_7
;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
@@ -8783,21 +8328,10 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_4: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_14
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_5: # %else5
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_6: # %else8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_16
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_7: # %else11
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_9
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_6
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
@@ -8879,39 +8413,6 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFHMIN-NEXT: ret
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_14: # %cond.load4
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_15: # %cond.load7
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_6
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_16: # %cond.load10
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_7
%ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
@@ -8968,21 +8469,10 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: .LBB75_4: # %else2
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_14
-; RV64ZVE32F-ZVFH-NEXT: .LBB75_5: # %else5
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB75_6: # %else8
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_16
-; RV64ZVE32F-ZVFH-NEXT: .LBB75_7: # %else11
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_9
-; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_6
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
@@ -9064,39 +8554,6 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFH-NEXT: ret
-; RV64ZVE32F-ZVFH-NEXT: .LBB75_14: # %cond.load4
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFH-NEXT: j .LBB75_5
-; RV64ZVE32F-ZVFH-NEXT: .LBB75_15: # %cond.load7
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: j .LBB75_6
-; RV64ZVE32F-ZVFH-NEXT: .LBB75_16: # %cond.load10
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: j .LBB75_7
;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
@@ -9128,21 +8585,10 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_4: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_14
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_5: # %else5
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_6: # %else8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_16
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_7: # %else11
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_9
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_6
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
@@ -9224,39 +8670,6 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFHMIN-NEXT: ret
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_14: # %cond.load4
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_15: # %cond.load7
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_6
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_16: # %cond.load10
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_7
%eidxs = sext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
@@ -9314,37 +8727,71 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: .LBB76_4: # %else2
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_14
-; RV64ZVE32F-ZVFH-NEXT: .LBB76_5: # %else5
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_6
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_6: # %else5
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB76_6: # %else8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_8
+; RV64ZVE32F-ZVFH-NEXT: # %bb.7: # %cond.load7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 3
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_8: # %else8
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_16
-; RV64ZVE32F-ZVFH-NEXT: .LBB76_7: # %else11
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_10
+; RV64ZVE32F-ZVFH-NEXT: # %bb.9: # %cond.load10
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 4
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_10: # %else11
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_9
-; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %cond.load13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5
-; RV64ZVE32F-ZVFH-NEXT: .LBB76_9: # %else14
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 5
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_12: # %else14
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_11
-; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_14
+; RV64ZVE32F-ZVFH-NEXT: # %bb.13: # %cond.load16
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
@@ -9352,14 +8799,14 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6
-; RV64ZVE32F-ZVFH-NEXT: .LBB76_11: # %else17
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_14: # %else17
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB76_13
-; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB76_16
+; RV64ZVE32F-ZVFH-NEXT: # %bb.15: # %cond.load19
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255
+; RV64ZVE32F-ZVFH-NEXT: zext.b a1, a1
; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
@@ -9367,46 +8814,10 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7
-; RV64ZVE32F-ZVFH-NEXT: .LBB76_13: # %else20
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_16: # %else20
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFH-NEXT: ret
-; RV64ZVE32F-ZVFH-NEXT: .LBB76_14: # %cond.load4
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFH-NEXT: j .LBB76_5
-; RV64ZVE32F-ZVFH-NEXT: .LBB76_15: # %cond.load7
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: j .LBB76_6
-; RV64ZVE32F-ZVFH-NEXT: .LBB76_16: # %cond.load10
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: j .LBB76_7
;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
@@ -9440,37 +8851,71 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_4: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_14
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_5: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_6
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a2, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_6: # %else5
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_6: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_8
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.7: # %cond.load7
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a2, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_8: # %else8
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_16
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_7: # %else11
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_10
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.9: # %cond.load10
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a2, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_10: # %else11
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_9
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %cond.load13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a2, a2
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_9: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_12: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_11
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_14
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.13: # %cond.load16
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a2, a2
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
@@ -9478,14 +8923,14 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_11: # %else17
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_14: # %else17
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB76_13
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB76_16
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.15: # %cond.load19
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a1, a1
; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
@@ -9493,46 +8938,10 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_13: # %else20
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %else20
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFHMIN-NEXT: ret
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_14: # %cond.load4
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_15: # %cond.load7
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a2, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_6
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %cond.load10
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_7
%eidxs = zext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
@@ -9587,18 +8996,20 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFH-NEXT: .LBB77_4: # %else2
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_14
-; RV64ZVE32F-ZVFH-NEXT: .LBB77_5: # %else5
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_6
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_6: # %else5
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB77_6: # %else8
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_16
-; RV64ZVE32F-ZVFH-NEXT: .LBB77_7: # %else11
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_9
-; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_8
+; RV64ZVE32F-ZVFH-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
@@ -9664,35 +9075,6 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFH-NEXT: ret
-; RV64ZVE32F-ZVFH-NEXT: .LBB77_14: # %cond.load4
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFH-NEXT: j .LBB77_5
-; RV64ZVE32F-ZVFH-NEXT: .LBB77_15: # %cond.load7
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: j .LBB77_6
-; RV64ZVE32F-ZVFH-NEXT: .LBB77_16: # %cond.load10
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: j .LBB77_7
;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
@@ -9723,18 +9105,20 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_4: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_14
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_5: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_6
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_6: # %else5
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_6: # %else8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_16
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_7: # %else11
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_9
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_8
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
@@ -9800,35 +9184,6 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFHMIN-NEXT: ret
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_14: # %cond.load4
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_15: # %cond.load7
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_6
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_16: # %cond.load10
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_7
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
@@ -10199,18 +9554,21 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: .LBB84_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
-; RV64ZVE32F-NEXT: .LBB84_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB84_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
-; RV64ZVE32F-NEXT: .LBB84_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_16
-; RV64ZVE32F-NEXT: .LBB84_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10281,39 +9639,6 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB84_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB84_5
-; RV64ZVE32F-NEXT: .LBB84_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB84_6
-; RV64ZVE32F-NEXT: .LBB84_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB84_7
%ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
@@ -10369,18 +9694,21 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: .LBB85_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB85_14
-; RV64ZVE32F-NEXT: .LBB85_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB85_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB85_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB85_15
-; RV64ZVE32F-NEXT: .LBB85_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB85_16
-; RV64ZVE32F-NEXT: .LBB85_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB85_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB85_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10451,39 +9779,6 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB85_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB85_5
-; RV64ZVE32F-NEXT: .LBB85_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB85_6
-; RV64ZVE32F-NEXT: .LBB85_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB85_7
%eidxs = sext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
@@ -10543,22 +9838,56 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: .LBB86_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB86_14
-; RV64ZVE32F-NEXT: .LBB86_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB86_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB86_15
-; RV64ZVE32F-NEXT: .LBB86_6: # %else8
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 3
+; RV64ZVE32F-NEXT: .LBB86_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB86_16
-; RV64ZVE32F-NEXT: .LBB86_7: # %else11
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
+; RV64ZVE32F-NEXT: .LBB86_10: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB86_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
@@ -10566,14 +9895,14 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
-; RV64ZVE32F-NEXT: .LBB86_9: # %else14
+; RV64ZVE32F-NEXT: .LBB86_12: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB86_11
-; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_14
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
@@ -10581,14 +9910,14 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
-; RV64ZVE32F-NEXT: .LBB86_11: # %else17
+; RV64ZVE32F-NEXT: .LBB86_14: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB86_13
-; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB86_16
+; RV64ZVE32F-NEXT: # %bb.15: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
-; RV64ZVE32F-NEXT: andi a1, a1, 255
+; RV64ZVE32F-NEXT: zext.b a1, a1
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
@@ -10596,46 +9925,10 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
-; RV64ZVE32F-NEXT: .LBB86_13: # %else20
+; RV64ZVE32F-NEXT: .LBB86_16: # %else20
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB86_5
-; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: zext.b a2, a2
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB86_6
-; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB86_7
%eidxs = zext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
@@ -10694,18 +9987,21 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: .LBB87_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB87_14
-; RV64ZVE32F-NEXT: .LBB87_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB87_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB87_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB87_15
-; RV64ZVE32F-NEXT: .LBB87_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB87_16
-; RV64ZVE32F-NEXT: .LBB87_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB87_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB87_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10776,39 +10072,6 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB87_5
-; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB87_6
-; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB87_7
%ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
@@ -10866,18 +10129,21 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: .LBB88_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB88_14
-; RV64ZVE32F-NEXT: .LBB88_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB88_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB88_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB88_15
-; RV64ZVE32F-NEXT: .LBB88_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB88_16
-; RV64ZVE32F-NEXT: .LBB88_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB88_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB88_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10948,39 +10214,6 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB88_5
-; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB88_6
-; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB88_7
%eidxs = sext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
@@ -11041,18 +10274,22 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: .LBB89_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB89_14
-; RV64ZVE32F-NEXT: .LBB89_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB89_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
+; RV64ZVE32F-NEXT: .LBB89_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB89_15
-; RV64ZVE32F-NEXT: .LBB89_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB89_16
-; RV64ZVE32F-NEXT: .LBB89_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB89_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB89_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -11128,42 +10365,6 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 46
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: j .LBB89_5
-; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 46
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB89_6
-; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 46
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB89_7
%eidxs = zext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
@@ -11216,21 +10417,10 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: .LBB90_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB90_14
-; RV64ZVE32F-NEXT: .LBB90_5: # %else5
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB90_15
-; RV64ZVE32F-NEXT: .LBB90_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB90_16
-; RV64ZVE32F-NEXT: .LBB90_7: # %else11
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB90_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB90_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
@@ -11242,13 +10432,13 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB90_15
-; RV64ZVE32F-NEXT: # %bb.7: # %else8
+; RV64ZVE32F-NEXT: .LBB90_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB90_16
; RV64ZVE32F-NEXT: .LBB90_8: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB90_10
-; RV64ZVE32F-NEXT: .LBB90_9: # %cond.load13
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -11296,28 +10486,17 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
-; RV64ZVE32F-NEXT: j .LBB90_5
-; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 3
+; RV64ZVE32F-NEXT: j .LBB90_7
+; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10
+; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: j .LBB90_6
-; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
-; RV64ZVE32F-NEXT: j .LBB90_7
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
+; RV64ZVE32F-NEXT: j .LBB90_8
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
@@ -11917,9 +11096,14 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: .LBB97_4: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB97_14
-; RV64ZVE32F-NEXT: .LBB97_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB97_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: slli a3, a3, 3
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa2, 0(a3)
+; RV64ZVE32F-NEXT: .LBB97_6: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB97_8
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
@@ -11930,12 +11114,10 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: .LBB97_8: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB97_16
-; RV64ZVE32F-NEXT: .LBB97_7: # %else11
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB97_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a3, .LBB97_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
@@ -11979,25 +11161,6 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB97_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB97_5
-; RV64ZVE32F-NEXT: .LBB97_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB97_6
-; RV64ZVE32F-NEXT: .LBB97_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB97_7
%ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -12135,9 +11298,14 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB98_4: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB98_14
-; RV64ZVE32F-NEXT: .LBB98_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB98_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: slli a3, a3, 3
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa2, 0(a3)
+; RV64ZVE32F-NEXT: .LBB98_6: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB98_8
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
@@ -12148,12 +11316,10 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: .LBB98_8: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB98_16
-; RV64ZVE32F-NEXT: .LBB98_7: # %else11
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB98_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a3, .LBB98_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
@@ -12197,25 +11363,6 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB98_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB98_5
-; RV64ZVE32F-NEXT: .LBB98_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB98_6
-; RV64ZVE32F-NEXT: .LBB98_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB98_7
%eidxs = sext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -12357,9 +11504,15 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB99_4: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB99_14
-; RV64ZVE32F-NEXT: .LBB99_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB99_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: zext.b a3, a3
+; RV64ZVE32F-NEXT: slli a3, a3, 3
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa2, 0(a3)
+; RV64ZVE32F-NEXT: .LBB99_6: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB99_8
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
@@ -12371,12 +11524,10 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: .LBB99_8: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB99_16
-; RV64ZVE32F-NEXT: .LBB99_7: # %else11
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB99_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a3, .LBB99_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: zext.b a3, a3
; RV64ZVE32F-NEXT: slli a3, a3, 3
@@ -12424,28 +11575,6 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB99_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: andi a3, a3, 255
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB99_5
-; RV64ZVE32F-NEXT: .LBB99_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: andi a3, a3, 255
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB99_6
-; RV64ZVE32F-NEXT: .LBB99_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: andi a3, a3, 255
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB99_7
%eidxs = zext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -12586,9 +11715,14 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV64ZVE32F-NEXT: .LBB100_4: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB100_14
-; RV64ZVE32F-NEXT: .LBB100_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB100_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: slli a3, a3, 3
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa2, 0(a3)
+; RV64ZVE32F-NEXT: .LBB100_6: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB100_8
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
@@ -12599,12 +11733,10 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: .LBB100_8: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB100_16
-; RV64ZVE32F-NEXT: .LBB100_7: # %else11
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB100_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a3, .LBB100_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
@@ -12648,25 +11780,6 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB100_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB100_5
-; RV64ZVE32F-NEXT: .LBB100_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB100_6
-; RV64ZVE32F-NEXT: .LBB100_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB100_7
%ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -12806,9 +11919,14 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: .LBB101_4: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB101_14
-; RV64ZVE32F-NEXT: .LBB101_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB101_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: slli a3, a3, 3
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa2, 0(a3)
+; RV64ZVE32F-NEXT: .LBB101_6: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB101_8
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
@@ -12819,12 +11937,10 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: .LBB101_8: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB101_16
-; RV64ZVE32F-NEXT: .LBB101_7: # %else11
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB101_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a3, .LBB101_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
@@ -12868,25 +11984,6 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB101_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB101_5
-; RV64ZVE32F-NEXT: .LBB101_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB101_6
-; RV64ZVE32F-NEXT: .LBB101_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB101_7
%eidxs = sext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -13030,9 +12127,15 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: .LBB102_4: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB102_14
-; RV64ZVE32F-NEXT: .LBB102_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB102_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: slli a3, a3, 48
+; RV64ZVE32F-NEXT: srli a3, a3, 45
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa2, 0(a3)
+; RV64ZVE32F-NEXT: .LBB102_6: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB102_8
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
@@ -13044,12 +12147,10 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: .LBB102_8: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB102_16
-; RV64ZVE32F-NEXT: .LBB102_7: # %else11
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB102_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a3, .LBB102_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 48
; RV64ZVE32F-NEXT: srli a3, a3, 45
@@ -13097,28 +12198,6 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB102_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 48
-; RV64ZVE32F-NEXT: srli a3, a3, 45
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB102_5
-; RV64ZVE32F-NEXT: .LBB102_15: # %cond.load7
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 48
-; RV64ZVE32F-NEXT: srli a3, a3, 45
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB102_6
-; RV64ZVE32F-NEXT: .LBB102_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: slli a3, a3, 48
-; RV64ZVE32F-NEXT: srli a3, a3, 45
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB102_7
%eidxs = zext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -13257,22 +12336,28 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV64ZVE32F-NEXT: .LBB103_4: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB103_14
-; RV64ZVE32F-NEXT: .LBB103_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB103_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: slli a3, a3, 3
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa2, 0(a3)
+; RV64ZVE32F-NEXT: .LBB103_6: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB103_15
-; RV64ZVE32F-NEXT: # %bb.7: # %else8
+; RV64ZVE32F-NEXT: .LBB103_7: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB103_16
; RV64ZVE32F-NEXT: .LBB103_8: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB103_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: beqz a3, .LBB103_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
@@ -13305,12 +12390,6 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB103_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB103_5
; RV64ZVE32F-NEXT: .LBB103_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
@@ -13318,14 +12397,14 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB103_6
+; RV64ZVE32F-NEXT: j .LBB103_7
; RV64ZVE32F-NEXT: .LBB103_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB103_7
+; RV64ZVE32F-NEXT: j .LBB103_8
%ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -13463,22 +12542,28 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: .LBB104_4: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB104_14
-; RV64ZVE32F-NEXT: .LBB104_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB104_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: slli a3, a3, 3
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa2, 0(a3)
+; RV64ZVE32F-NEXT: .LBB104_6: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB104_15
-; RV64ZVE32F-NEXT: # %bb.7: # %else8
+; RV64ZVE32F-NEXT: .LBB104_7: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB104_16
; RV64ZVE32F-NEXT: .LBB104_8: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB104_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: beqz a3, .LBB104_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
@@ -13511,12 +12596,6 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB104_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB104_5
; RV64ZVE32F-NEXT: .LBB104_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
@@ -13524,14 +12603,14 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB104_6
+; RV64ZVE32F-NEXT: j .LBB104_7
; RV64ZVE32F-NEXT: .LBB104_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB104_7
+; RV64ZVE32F-NEXT: j .LBB104_8
%eidxs = sext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -13672,22 +12751,29 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: .LBB105_4: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB105_14
-; RV64ZVE32F-NEXT: .LBB105_5: # %else5
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB105_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: slli a3, a3, 32
+; RV64ZVE32F-NEXT: srli a3, a3, 29
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa2, 0(a3)
+; RV64ZVE32F-NEXT: .LBB105_6: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB105_15
-; RV64ZVE32F-NEXT: # %bb.7: # %else8
+; RV64ZVE32F-NEXT: .LBB105_7: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB105_16
; RV64ZVE32F-NEXT: .LBB105_8: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB105_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load13
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: beqz a3, .LBB105_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
@@ -13723,13 +12809,6 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB105_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: slli a3, a3, 32
-; RV64ZVE32F-NEXT: srli a3, a3, 29
-; RV64ZVE32F-NEXT: add a3, a1, a3
-; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB105_5
; RV64ZVE32F-NEXT: .LBB105_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
@@ -13738,7 +12817,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB105_6
+; RV64ZVE32F-NEXT: j .LBB105_7
; RV64ZVE32F-NEXT: .LBB105_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -13746,7 +12825,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB105_7
+; RV64ZVE32F-NEXT: j .LBB105_8
%eidxs = zext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -14012,16 +13091,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: .LBB107_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB107_25
-; RV64ZVE32F-NEXT: .LBB107_5: # %else5
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB107_26
-; RV64ZVE32F-NEXT: .LBB107_6: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB107_8
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -14068,17 +13140,42 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB107_27
-; RV64ZVE32F-NEXT: .LBB107_11: # %else17
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_14
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load16
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6
+; RV64ZVE32F-NEXT: .LBB107_14: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: bnez a2, .LBB107_28
-; RV64ZVE32F-NEXT: .LBB107_12: # %else20
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_16
+; RV64ZVE32F-NEXT: # %bb.15: # %cond.load19
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7
+; RV64ZVE32F-NEXT: .LBB107_16: # %else20
; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: bnez a2, .LBB107_29
-; RV64ZVE32F-NEXT: .LBB107_13: # %else23
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_18
+; RV64ZVE32F-NEXT: # %bb.17: # %cond.load22
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8
+; RV64ZVE32F-NEXT: .LBB107_18: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: beqz a2, .LBB107_15
-; RV64ZVE32F-NEXT: # %bb.14: # %cond.load25
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_20
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -14087,48 +13184,71 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 9
-; RV64ZVE32F-NEXT: .LBB107_15: # %else26
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-NEXT: .LBB107_20: # %else26
; RV64ZVE32F-NEXT: andi a2, a1, 1024
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB107_30
-; RV64ZVE32F-NEXT: .LBB107_16: # %else29
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_22
+; RV64ZVE32F-NEXT: # %bb.21: # %cond.load28
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10
+; RV64ZVE32F-NEXT: .LBB107_22: # %else29
; RV64ZVE32F-NEXT: slli a2, a1, 52
-; RV64ZVE32F-NEXT: bltz a2, .LBB107_31
-; RV64ZVE32F-NEXT: .LBB107_17: # %else32
+; RV64ZVE32F-NEXT: bgez a2, .LBB107_24
+; RV64ZVE32F-NEXT: # %bb.23: # %cond.load31
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 11
+; RV64ZVE32F-NEXT: .LBB107_24: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 51
-; RV64ZVE32F-NEXT: bltz a2, .LBB107_32
-; RV64ZVE32F-NEXT: .LBB107_18: # %else35
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: bgez a2, .LBB107_26
+; RV64ZVE32F-NEXT: # %bb.25: # %cond.load34
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 12
+; RV64ZVE32F-NEXT: .LBB107_26: # %else35
; RV64ZVE32F-NEXT: slli a2, a1, 50
-; RV64ZVE32F-NEXT: bgez a2, .LBB107_20
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.load37
+; RV64ZVE32F-NEXT: bgez a2, .LBB107_28
+; RV64ZVE32F-NEXT: # %bb.27: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13
-; RV64ZVE32F-NEXT: .LBB107_20: # %else38
+; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 13
+; RV64ZVE32F-NEXT: .LBB107_28: # %else38
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: bgez a2, .LBB107_22
-; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bgez a2, .LBB107_30
+; RV64ZVE32F-NEXT: # %bb.29: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14
-; RV64ZVE32F-NEXT: .LBB107_22: # %else41
+; RV64ZVE32F-NEXT: .LBB107_30: # %else41
; RV64ZVE32F-NEXT: lui a2, 1048568
; RV64ZVE32F-NEXT: and a1, a1, a2
-; RV64ZVE32F-NEXT: beqz a1, .LBB107_24
-; RV64ZVE32F-NEXT: # %bb.23: # %cond.load43
+; RV64ZVE32F-NEXT: beqz a1, .LBB107_32
+; RV64ZVE32F-NEXT: # %bb.31: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -14137,81 +13257,10 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15
-; RV64ZVE32F-NEXT: .LBB107_24: # %else44
+; RV64ZVE32F-NEXT: .LBB107_32: # %else44
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB107_25: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v12, 2
-; RV64ZVE32F-NEXT: j .LBB107_5
-; RV64ZVE32F-NEXT: .LBB107_26: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3
-; RV64ZVE32F-NEXT: j .LBB107_6
-; RV64ZVE32F-NEXT: .LBB107_27: # %cond.load16
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6
-; RV64ZVE32F-NEXT: j .LBB107_11
-; RV64ZVE32F-NEXT: .LBB107_28: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7
-; RV64ZVE32F-NEXT: j .LBB107_12
-; RV64ZVE32F-NEXT: .LBB107_29: # %cond.load22
-; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8
-; RV64ZVE32F-NEXT: j .LBB107_13
-; RV64ZVE32F-NEXT: .LBB107_30: # %cond.load28
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v11, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10
-; RV64ZVE32F-NEXT: j .LBB107_16
-; RV64ZVE32F-NEXT: .LBB107_31: # %cond.load31
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 11
-; RV64ZVE32F-NEXT: j .LBB107_17
-; RV64ZVE32F-NEXT: .LBB107_32: # %cond.load34
-; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 12
-; RV64ZVE32F-NEXT: j .LBB107_18
%ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
%v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru)
ret <16 x i8> %v
@@ -14279,13 +13328,21 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB108_49
-; RV64ZVE32F-NEXT: .LBB108_5: # %else5
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v13, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 2
+; RV64ZVE32F-NEXT: .LBB108_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB108_8
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v13
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
@@ -14318,9 +13375,16 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: .LBB108_12: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB108_51
-; RV64ZVE32F-NEXT: .LBB108_11: # %else17
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_14
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.load16
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v13, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 6
+; RV64ZVE32F-NEXT: .LBB108_14: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB108_16
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load19
@@ -14346,8 +13410,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8
; RV64ZVE32F-NEXT: .LBB108_18: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: beqz a2, .LBB108_15
-; RV64ZVE32F-NEXT: # %bb.14: # %cond.load25
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_20
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.load25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@@ -14407,20 +13471,27 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: .LBB108_28: # %else38
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_54
-; RV64ZVE32F-NEXT: .LBB108_24: # %else41
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_30
+; RV64ZVE32F-NEXT: # %bb.29: # %cond.load40
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v13, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 14
+; RV64ZVE32F-NEXT: .LBB108_30: # %else41
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16
; RV64ZVE32F-NEXT: bltz a2, .LBB108_63
-; RV64ZVE32F-NEXT: # %bb.31: # %else44
+; RV64ZVE32F-NEXT: .LBB108_31: # %else44
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bltz a2, .LBB108_64
; RV64ZVE32F-NEXT: .LBB108_32: # %else47
; RV64ZVE32F-NEXT: slli a2, a1, 46
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_28
-; RV64ZVE32F-NEXT: # %bb.27: # %cond.load49
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_34
+; RV64ZVE32F-NEXT: # %bb.33: # %cond.load49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -14432,16 +13503,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: .LBB108_34: # %else50
; RV64ZVE32F-NEXT: slli a2, a1, 45
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_57
-; RV64ZVE32F-NEXT: .LBB108_29: # %else53
-; RV64ZVE32F-NEXT: slli a2, a1, 44
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_58
-; RV64ZVE32F-NEXT: .LBB108_30: # %else56
-; RV64ZVE32F-NEXT: slli a2, a1, 43
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_32
-; RV64ZVE32F-NEXT: # %bb.31: # %cond.load58
-; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_36
+; RV64ZVE32F-NEXT: # %bb.35: # %cond.load52
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -14488,8 +13552,15 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: slli a2, a1, 41
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_59
-; RV64ZVE32F-NEXT: .LBB108_35: # %else65
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_44
+; RV64ZVE32F-NEXT: # %bb.43: # %cond.load64
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
+; RV64ZVE32F-NEXT: .LBB108_44: # %else65
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bgez a2, .LBB108_46
; RV64ZVE32F-NEXT: # %bb.45: # %cond.load67
@@ -14515,8 +13586,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
; RV64ZVE32F-NEXT: .LBB108_48: # %else71
; RV64ZVE32F-NEXT: slli a2, a1, 38
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_39
-; RV64ZVE32F-NEXT: # %bb.38: # %cond.load73
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_50
+; RV64ZVE32F-NEXT: # %bb.49: # %cond.load73
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -14528,18 +13599,19 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: .LBB108_50: # %else74
; RV64ZVE32F-NEXT: slli a2, a1, 37
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_62
-; RV64ZVE32F-NEXT: .LBB108_40: # %else77
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_52
+; RV64ZVE32F-NEXT: # %bb.51: # %cond.load76
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26
+; RV64ZVE32F-NEXT: .LBB108_52: # %else77
; RV64ZVE32F-NEXT: slli a2, a1, 36
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_63
-; RV64ZVE32F-NEXT: .LBB108_41: # %else80
-; RV64ZVE32F-NEXT: slli a2, a1, 35
-; RV64ZVE32F-NEXT: bltz a2, .LBB108_64
-; RV64ZVE32F-NEXT: .LBB108_42: # %else83
-; RV64ZVE32F-NEXT: slli a2, a1, 34
-; RV64ZVE32F-NEXT: bgez a2, .LBB108_44
-; RV64ZVE32F-NEXT: # %bb.43: # %cond.load85
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_54
+; RV64ZVE32F-NEXT: # %bb.53: # %cond.load79
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -14602,146 +13674,24 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB108_49: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v14, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2
-; RV64ZVE32F-NEXT: j .LBB108_5
-; RV64ZVE32F-NEXT: .LBB108_50: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
-; RV64ZVE32F-NEXT: j .LBB108_6
-; RV64ZVE32F-NEXT: .LBB108_51: # %cond.load16
-; RV64ZVE32F-NEXT: vmv.x.s a2, v13
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v14, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6
-; RV64ZVE32F-NEXT: j .LBB108_11
-; RV64ZVE32F-NEXT: .LBB108_52: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v13
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v13, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 7
-; RV64ZVE32F-NEXT: j .LBB108_12
-; RV64ZVE32F-NEXT: .LBB108_53: # %cond.load22
-; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v13, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8
-; RV64ZVE32F-NEXT: j .LBB108_13
-; RV64ZVE32F-NEXT: .LBB108_54: # %cond.load40
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14
-; RV64ZVE32F-NEXT: j .LBB108_24
-; RV64ZVE32F-NEXT: .LBB108_55: # %cond.load43
+; RV64ZVE32F-NEXT: .LBB108_63: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15
-; RV64ZVE32F-NEXT: j .LBB108_25
-; RV64ZVE32F-NEXT: .LBB108_56: # %cond.load46
+; RV64ZVE32F-NEXT: j .LBB108_31
+; RV64ZVE32F-NEXT: .LBB108_64: # %cond.load46
; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
-; RV64ZVE32F-NEXT: j .LBB108_26
-; RV64ZVE32F-NEXT: .LBB108_57: # %cond.load52
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v14, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18
-; RV64ZVE32F-NEXT: j .LBB108_29
-; RV64ZVE32F-NEXT: .LBB108_58: # %cond.load55
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19
-; RV64ZVE32F-NEXT: j .LBB108_30
-; RV64ZVE32F-NEXT: .LBB108_59: # %cond.load64
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
-; RV64ZVE32F-NEXT: j .LBB108_35
-; RV64ZVE32F-NEXT: .LBB108_60: # %cond.load67
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
-; RV64ZVE32F-NEXT: j .LBB108_36
-; RV64ZVE32F-NEXT: .LBB108_61: # %cond.load70
-; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
-; RV64ZVE32F-NEXT: j .LBB108_37
-; RV64ZVE32F-NEXT: .LBB108_62: # %cond.load76
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26
-; RV64ZVE32F-NEXT: j .LBB108_40
-; RV64ZVE32F-NEXT: .LBB108_63: # %cond.load79
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
-; RV64ZVE32F-NEXT: j .LBB108_41
-; RV64ZVE32F-NEXT: .LBB108_64: # %cond.load82
-; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
-; RV64ZVE32F-NEXT: j .LBB108_42
+; RV64ZVE32F-NEXT: j .LBB108_32
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
%v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
ret <32 x i8> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index ec9a9166c79bc..1d973d25c4a8b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -499,65 +499,64 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: .LBB9_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB9_12
-; RV64ZVE32F-NEXT: .LBB9_5: # %else4
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB9_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-NEXT: vse8.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB9_6: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB9_13
-; RV64ZVE32F-NEXT: .LBB9_6: # %else6
+; RV64ZVE32F-NEXT: beqz a2, .LBB9_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vse8.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB9_8: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB9_14
-; RV64ZVE32F-NEXT: .LBB9_7: # %else8
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB9_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vse8.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB9_10: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB9_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB9_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: .LBB9_9: # %else10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vse8.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB9_12: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB9_15
-; RV64ZVE32F-NEXT: .LBB9_10: # %else12
+; RV64ZVE32F-NEXT: .LBB9_13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB9_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB9_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
-; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB9_5
-; RV64ZVE32F-NEXT: .LBB9_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB9_6
-; RV64ZVE32F-NEXT: .LBB9_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB9_7
; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB9_10
+; RV64ZVE32F-NEXT: j .LBB9_13
; RV64ZVE32F-NEXT: .LBB9_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1001,63 +1000,62 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: .LBB18_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB18_12
-; RV64ZVE32F-NEXT: .LBB18_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB18_13
-; RV64ZVE32F-NEXT: .LBB18_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB18_14
-; RV64ZVE32F-NEXT: .LBB18_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB18_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: .LBB18_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB18_15
-; RV64ZVE32F-NEXT: .LBB18_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB18_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB18_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB18_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB18_5
-; RV64ZVE32F-NEXT: .LBB18_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB18_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB18_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB18_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB18_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB18_6
-; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB18_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB18_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB18_7
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB18_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB18_15
+; RV64ZVE32F-NEXT: .LBB18_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB18_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB18_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
@@ -1065,7 +1063,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB18_10
+; RV64ZVE32F-NEXT: j .LBB18_13
; RV64ZVE32F-NEXT: .LBB18_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1127,63 +1125,62 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB19_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB19_12
-; RV64ZVE32F-NEXT: .LBB19_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB19_13
-; RV64ZVE32F-NEXT: .LBB19_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB19_14
-; RV64ZVE32F-NEXT: .LBB19_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB19_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: .LBB19_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB19_15
-; RV64ZVE32F-NEXT: .LBB19_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB19_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB19_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB19_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB19_5
-; RV64ZVE32F-NEXT: .LBB19_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB19_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB19_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB19_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB19_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB19_6
-; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB19_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB19_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB19_7
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB19_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB19_15
+; RV64ZVE32F-NEXT: .LBB19_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB19_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB19_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
@@ -1191,7 +1188,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB19_10
+; RV64ZVE32F-NEXT: j .LBB19_13
; RV64ZVE32F-NEXT: .LBB19_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1254,67 +1251,66 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB20_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB20_12
-; RV64ZVE32F-NEXT: .LBB20_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB20_13
-; RV64ZVE32F-NEXT: .LBB20_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB20_14
-; RV64ZVE32F-NEXT: .LBB20_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB20_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: .LBB20_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB20_15
-; RV64ZVE32F-NEXT: .LBB20_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB20_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB20_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB20_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB20_5
-; RV64ZVE32F-NEXT: .LBB20_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB20_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB20_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB20_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB20_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB20_6
-; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB20_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB20_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB20_7
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB20_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB20_15
+; RV64ZVE32F-NEXT: .LBB20_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB20_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB20_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: zext.b a2, a2
@@ -1323,7 +1319,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB20_10
+; RV64ZVE32F-NEXT: j .LBB20_13
; RV64ZVE32F-NEXT: .LBB20_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1385,70 +1381,69 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: .LBB21_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB21_12
-; RV64ZVE32F-NEXT: .LBB21_5: # %else4
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB21_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-NEXT: vse16.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB21_6: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB21_13
-; RV64ZVE32F-NEXT: .LBB21_6: # %else6
+; RV64ZVE32F-NEXT: beqz a2, .LBB21_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB21_8: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB21_14
-; RV64ZVE32F-NEXT: .LBB21_7: # %else8
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB21_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB21_10: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB21_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB21_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: .LBB21_9: # %else10
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB21_12: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB21_15
-; RV64ZVE32F-NEXT: .LBB21_10: # %else12
+; RV64ZVE32F-NEXT: .LBB21_13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB21_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB21_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
-; RV64ZVE32F-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB21_5
-; RV64ZVE32F-NEXT: .LBB21_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB21_6
-; RV64ZVE32F-NEXT: .LBB21_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB21_7
-; RV64ZVE32F-NEXT: .LBB21_15: # %cond.store11
+; RV64ZVE32F-NEXT: .LBB21_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB21_10
+; RV64ZVE32F-NEXT: j .LBB21_13
; RV64ZVE32F-NEXT: .LBB21_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1841,58 +1836,47 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: .LBB29_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB29_12
-; RV64ZVE32F-NEXT: .LBB29_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB29_13
-; RV64ZVE32F-NEXT: .LBB29_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB29_14
-; RV64ZVE32F-NEXT: .LBB29_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB29_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB29_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB29_15
-; RV64ZVE32F-NEXT: .LBB29_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB29_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB29_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB29_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB29_5
-; RV64ZVE32F-NEXT: .LBB29_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB29_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB29_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB29_6
-; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB29_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB29_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB29_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB29_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -1900,7 +1884,16 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB29_7
+; RV64ZVE32F-NEXT: .LBB29_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB29_15
+; RV64ZVE32F-NEXT: .LBB29_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB29_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB29_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -1909,7 +1902,7 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB29_10
+; RV64ZVE32F-NEXT: j .LBB29_13
; RV64ZVE32F-NEXT: .LBB29_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -1971,58 +1964,47 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB30_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB30_12
-; RV64ZVE32F-NEXT: .LBB30_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB30_13
-; RV64ZVE32F-NEXT: .LBB30_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB30_14
-; RV64ZVE32F-NEXT: .LBB30_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB30_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB30_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB30_15
-; RV64ZVE32F-NEXT: .LBB30_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB30_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB30_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB30_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB30_5
-; RV64ZVE32F-NEXT: .LBB30_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB30_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB30_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB30_6
-; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB30_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB30_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB30_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB30_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -2030,7 +2012,16 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB30_7
+; RV64ZVE32F-NEXT: .LBB30_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB30_15
+; RV64ZVE32F-NEXT: .LBB30_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB30_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB30_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -2039,7 +2030,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB30_10
+; RV64ZVE32F-NEXT: j .LBB30_13
; RV64ZVE32F-NEXT: .LBB30_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2105,49 +2096,21 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB31_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB31_12
-; RV64ZVE32F-NEXT: .LBB31_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB31_13
-; RV64ZVE32F-NEXT: .LBB31_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB31_14
-; RV64ZVE32F-NEXT: .LBB31_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB31_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB31_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB31_15
-; RV64ZVE32F-NEXT: .LBB31_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB31_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB31_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB31_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB31_5
-; RV64ZVE32F-NEXT: .LBB31_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB31_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB31_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -2155,20 +2118,46 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB31_6
-; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB31_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB31_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB31_7
+; RV64ZVE32F-NEXT: .LBB31_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB31_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB31_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB31_15
+; RV64ZVE32F-NEXT: .LBB31_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB31_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB31_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: zext.b a2, a2
@@ -2178,7 +2167,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB31_10
+; RV64ZVE32F-NEXT: j .LBB31_13
; RV64ZVE32F-NEXT: .LBB31_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2244,66 +2233,64 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: .LBB32_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB32_12
-; RV64ZVE32F-NEXT: .LBB32_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB32_13
-; RV64ZVE32F-NEXT: .LBB32_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB32_14
-; RV64ZVE32F-NEXT: .LBB32_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB32_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB32_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB32_15
-; RV64ZVE32F-NEXT: .LBB32_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB32_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB32_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB32_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB32_5
-; RV64ZVE32F-NEXT: .LBB32_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB32_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB32_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB32_6
-; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB32_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB32_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB32_7
+; RV64ZVE32F-NEXT: .LBB32_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB32_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB32_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB32_15
+; RV64ZVE32F-NEXT: .LBB32_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB32_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB32_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -2312,7 +2299,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB32_10
+; RV64ZVE32F-NEXT: j .LBB32_13
; RV64ZVE32F-NEXT: .LBB32_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2376,76 +2363,74 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB33_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB33_12
-; RV64ZVE32F-NEXT: .LBB33_5: # %else4
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB33_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB33_6: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB33_13
-; RV64ZVE32F-NEXT: .LBB33_6: # %else6
+; RV64ZVE32F-NEXT: beqz a2, .LBB33_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB33_8: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB33_14
-; RV64ZVE32F-NEXT: .LBB33_7: # %else8
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB33_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB33_10: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB33_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB33_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB33_9: # %else10
+; RV64ZVE32F-NEXT: .LBB33_12: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB33_15
-; RV64ZVE32F-NEXT: .LBB33_10: # %else12
+; RV64ZVE32F-NEXT: .LBB33_13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB33_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB33_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB33_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB33_5
-; RV64ZVE32F-NEXT: .LBB33_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB33_6
-; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB33_7
-; RV64ZVE32F-NEXT: .LBB33_15: # %cond.store11
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB33_10
-; RV64ZVE32F-NEXT: .LBB33_16: # %cond.store13
+; RV64ZVE32F-NEXT: j .LBB33_13
+; RV64ZVE32F-NEXT: .LBB33_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
@@ -2511,40 +2496,10 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB34_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB34_12
-; RV64ZVE32F-NEXT: .LBB34_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB34_13
-; RV64ZVE32F-NEXT: .LBB34_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB34_14
-; RV64ZVE32F-NEXT: .LBB34_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB34_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 46
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB34_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB34_15
-; RV64ZVE32F-NEXT: .LBB34_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB34_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 48
; RV64ZVE32F-NEXT: srli a2, a2, 46
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -2552,8 +2507,10 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB34_5
-; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB34_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -2561,12 +2518,15 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a2, a2, 46
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB34_6
-; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB34_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 48
; RV64ZVE32F-NEXT: srli a2, a2, 46
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -2574,7 +2534,30 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB34_7
+; RV64ZVE32F-NEXT: .LBB34_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB34_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB34_15
+; RV64ZVE32F-NEXT: .LBB34_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 48
@@ -2584,7 +2567,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB34_10
+; RV64ZVE32F-NEXT: j .LBB34_13
; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2645,63 +2628,64 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: .LBB35_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB35_12
-; RV64ZVE32F-NEXT: .LBB35_5: # %else4
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v13, (a2)
+; RV64ZVE32F-NEXT: .LBB35_6: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB35_13
-; RV64ZVE32F-NEXT: .LBB35_6: # %else6
+; RV64ZVE32F-NEXT: .LBB35_7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
-; RV64ZVE32F-NEXT: .LBB35_7: # %else8
+; RV64ZVE32F-NEXT: .LBB35_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB35_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: .LBB35_9: # %else10
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB35_10: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
-; RV64ZVE32F-NEXT: .LBB35_10: # %else12
+; RV64ZVE32F-NEXT: .LBB35_11: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB35_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
+; RV64ZVE32F-NEXT: # %bb.12: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB35_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB35_5
; RV64ZVE32F-NEXT: .LBB35_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB35_6
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: j .LBB35_7
; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB35_7
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: j .LBB35_8
; RV64ZVE32F-NEXT: .LBB35_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -2710,7 +2694,7 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB35_10
+; RV64ZVE32F-NEXT: j .LBB35_11
; RV64ZVE32F-NEXT: .LBB35_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -3405,46 +3389,9 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: .LBB42_4: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB42_12
-; RV64ZVE32F-NEXT: .LBB42_5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB42_13
-; RV64ZVE32F-NEXT: .LBB42_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB42_14
-; RV64ZVE32F-NEXT: .LBB42_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB42_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a4, 0(a0)
-; RV64ZVE32F-NEXT: .LBB42_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB42_15
-; RV64ZVE32F-NEXT: .LBB42_10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB42_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB42_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB42_5
-; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB42_6
-; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB42_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
@@ -3468,13 +3415,32 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB42_7
+; RV64ZVE32F-NEXT: .LBB42_10: # %else8
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB42_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: add a0, a1, a0
+; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: .LBB42_12: # %else10
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bnez a0, .LBB42_15
+; RV64ZVE32F-NEXT: .LBB42_13: # %else12
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: bnez a0, .LBB42_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB42_10
+; RV64ZVE32F-NEXT: j .LBB42_13
; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3659,46 +3625,9 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB43_4: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB43_12
-; RV64ZVE32F-NEXT: .LBB43_5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB43_13
-; RV64ZVE32F-NEXT: .LBB43_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB43_14
-; RV64ZVE32F-NEXT: .LBB43_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB43_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a4, 0(a0)
-; RV64ZVE32F-NEXT: .LBB43_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB43_15
-; RV64ZVE32F-NEXT: .LBB43_10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB43_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB43_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB43_5
-; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB43_6
-; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB43_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
@@ -3722,13 +3651,32 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB43_7
+; RV64ZVE32F-NEXT: .LBB43_10: # %else8
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB43_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: add a0, a1, a0
+; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: .LBB43_12: # %else10
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bnez a0, .LBB43_15
+; RV64ZVE32F-NEXT: .LBB43_13: # %else12
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: bnez a0, .LBB43_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB43_10
+; RV64ZVE32F-NEXT: j .LBB43_13
; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3917,62 +3865,63 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB44_4: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB44_12
-; RV64ZVE32F-NEXT: .LBB44_5: # %else4
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB44_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: zext.b a0, a0
+; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: add a0, a1, a0
+; RV64ZVE32F-NEXT: sd t0, 0(a0)
+; RV64ZVE32F-NEXT: .LBB44_6: # %else4
; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB44_13
-; RV64ZVE32F-NEXT: .LBB44_6: # %else6
+; RV64ZVE32F-NEXT: beqz a0, .LBB44_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: zext.b a0, a0
+; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: add a0, a1, a0
+; RV64ZVE32F-NEXT: sd a7, 0(a0)
+; RV64ZVE32F-NEXT: .LBB44_8: # %else6
; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB44_14
-; RV64ZVE32F-NEXT: .LBB44_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB44_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB44_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: andi a0, a0, 255
+; RV64ZVE32F-NEXT: zext.b a0, a0
+; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: add a0, a1, a0
+; RV64ZVE32F-NEXT: sd a6, 0(a0)
+; RV64ZVE32F-NEXT: .LBB44_10: # %else8
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB44_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: zext.b a0, a0
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a5, 0(a0)
-; RV64ZVE32F-NEXT: .LBB44_9: # %else10
+; RV64ZVE32F-NEXT: .LBB44_12: # %else10
; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB44_15
-; RV64ZVE32F-NEXT: .LBB44_10: # %else12
+; RV64ZVE32F-NEXT: .LBB44_13: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB44_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB44_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: andi a0, a0, 255
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB44_5
-; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: andi a0, a0, 255
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB44_6
-; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7
-; RV64ZVE32F-NEXT: vmv.x.s a0, v9
-; RV64ZVE32F-NEXT: andi a0, a0, 255
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB44_7
; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: zext.b a0, a0
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB44_10
+; RV64ZVE32F-NEXT: j .LBB44_13
; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4161,46 +4110,9 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: .LBB45_4: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB45_12
-; RV64ZVE32F-NEXT: .LBB45_5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB45_13
-; RV64ZVE32F-NEXT: .LBB45_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB45_14
-; RV64ZVE32F-NEXT: .LBB45_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB45_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a4, 0(a0)
-; RV64ZVE32F-NEXT: .LBB45_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB45_15
-; RV64ZVE32F-NEXT: .LBB45_10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB45_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB45_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB45_5
-; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB45_6
-; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB45_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
@@ -4224,13 +4136,32 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB45_7
+; RV64ZVE32F-NEXT: .LBB45_10: # %else8
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB45_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: add a0, a1, a0
+; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: .LBB45_12: # %else10
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bnez a0, .LBB45_15
+; RV64ZVE32F-NEXT: .LBB45_13: # %else12
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: bnez a0, .LBB45_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB45_10
+; RV64ZVE32F-NEXT: j .LBB45_13
; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4417,47 +4348,10 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB46_4: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB46_12
-; RV64ZVE32F-NEXT: .LBB46_5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB46_13
-; RV64ZVE32F-NEXT: .LBB46_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB46_14
-; RV64ZVE32F-NEXT: .LBB46_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB46_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a4, 0(a0)
-; RV64ZVE32F-NEXT: .LBB46_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB46_15
-; RV64ZVE32F-NEXT: .LBB46_10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB46_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB46_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB46_5
-; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB46_6
-; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7
-; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB46_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
@@ -4480,13 +4374,32 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB46_7
+; RV64ZVE32F-NEXT: .LBB46_10: # %else8
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB46_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: add a0, a1, a0
+; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: .LBB46_12: # %else10
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bnez a0, .LBB46_15
+; RV64ZVE32F-NEXT: .LBB46_13: # %else12
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: bnez a0, .LBB46_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB46_10
+; RV64ZVE32F-NEXT: j .LBB46_13
; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4677,49 +4590,9 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB47_4: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB47_12
-; RV64ZVE32F-NEXT: .LBB47_5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB47_13
-; RV64ZVE32F-NEXT: .LBB47_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB47_14
-; RV64ZVE32F-NEXT: .LBB47_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB47_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 48
-; RV64ZVE32F-NEXT: srli a0, a0, 45
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a5, 0(a0)
-; RV64ZVE32F-NEXT: .LBB47_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB47_15
-; RV64ZVE32F-NEXT: .LBB47_10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB47_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB47_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 48
-; RV64ZVE32F-NEXT: srli a0, a0, 45
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB47_5
-; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 48
-; RV64ZVE32F-NEXT: srli a0, a0, 45
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB47_6
-; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB47_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 48
; RV64ZVE32F-NEXT: srli a0, a0, 45
@@ -4746,14 +4619,34 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB47_7
+; RV64ZVE32F-NEXT: .LBB47_10: # %else8
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB47_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: slli a0, a0, 48
+; RV64ZVE32F-NEXT: srli a0, a0, 45
+; RV64ZVE32F-NEXT: add a0, a1, a0
+; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: .LBB47_12: # %else10
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bnez a0, .LBB47_15
+; RV64ZVE32F-NEXT: .LBB47_13: # %else12
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: bnez a0, .LBB47_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 48
; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB47_10
+; RV64ZVE32F-NEXT: j .LBB47_13
; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4940,46 +4833,9 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: .LBB48_4: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB48_12
-; RV64ZVE32F-NEXT: .LBB48_5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB48_13
-; RV64ZVE32F-NEXT: .LBB48_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB48_14
-; RV64ZVE32F-NEXT: .LBB48_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB48_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a4, 0(a0)
-; RV64ZVE32F-NEXT: .LBB48_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB48_15
-; RV64ZVE32F-NEXT: .LBB48_10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB48_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB48_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB48_5
-; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB48_6
-; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB48_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
@@ -4989,13 +4845,13 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB48_13
-; RV64ZVE32F-NEXT: # %bb.7: # %else6
+; RV64ZVE32F-NEXT: .LBB48_7: # %else6
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB48_14
; RV64ZVE32F-NEXT: .LBB48_8: # %else8
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB48_10
-; RV64ZVE32F-NEXT: .LBB48_9: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
@@ -5007,10 +4863,10 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB48_15
-; RV64ZVE32F-NEXT: # %bb.11: # %else12
+; RV64ZVE32F-NEXT: .LBB48_11: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB48_16
-; RV64ZVE32F-NEXT: .LBB48_12: # %else14
+; RV64ZVE32F-NEXT: # %bb.12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -5019,21 +4875,20 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB48_8
+; RV64ZVE32F-NEXT: j .LBB48_7
; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB48_7
+; RV64ZVE32F-NEXT: j .LBB48_8
; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB48_10
+; RV64ZVE32F-NEXT: j .LBB48_11
; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5218,46 +5073,9 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB49_4: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB49_12
-; RV64ZVE32F-NEXT: .LBB49_5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB49_13
-; RV64ZVE32F-NEXT: .LBB49_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB49_14
-; RV64ZVE32F-NEXT: .LBB49_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB49_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a4, 0(a0)
-; RV64ZVE32F-NEXT: .LBB49_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB49_15
-; RV64ZVE32F-NEXT: .LBB49_10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB49_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB49_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB49_5
-; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 3
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB49_6
-; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB49_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
@@ -5267,13 +5085,13 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB49_13
-; RV64ZVE32F-NEXT: # %bb.7: # %else6
+; RV64ZVE32F-NEXT: .LBB49_7: # %else6
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB49_14
; RV64ZVE32F-NEXT: .LBB49_8: # %else8
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB49_10
-; RV64ZVE32F-NEXT: .LBB49_9: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
@@ -5285,10 +5103,10 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB49_15
-; RV64ZVE32F-NEXT: # %bb.11: # %else12
+; RV64ZVE32F-NEXT: .LBB49_11: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB49_16
-; RV64ZVE32F-NEXT: .LBB49_12: # %else14
+; RV64ZVE32F-NEXT: # %bb.12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -5297,21 +5115,20 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB49_8
+; RV64ZVE32F-NEXT: j .LBB49_7
; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB49_7
+; RV64ZVE32F-NEXT: j .LBB49_8
; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB49_10
+; RV64ZVE32F-NEXT: j .LBB49_11
; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5499,49 +5316,9 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB50_4: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB50_12
-; RV64ZVE32F-NEXT: .LBB50_5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a5, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB50_13
-; RV64ZVE32F-NEXT: .LBB50_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a5, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB50_14
-; RV64ZVE32F-NEXT: .LBB50_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a5, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB50_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 32
-; RV64ZVE32F-NEXT: srli a0, a0, 29
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a4, 0(a0)
-; RV64ZVE32F-NEXT: .LBB50_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a5, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB50_15
-; RV64ZVE32F-NEXT: .LBB50_10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a5, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB50_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB50_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 32
-; RV64ZVE32F-NEXT: srli a0, a0, 29
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB50_5
-; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: slli a0, a0, 32
-; RV64ZVE32F-NEXT: srli a0, a0, 29
-; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB50_6
-; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB50_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: srli a0, a0, 29
@@ -5552,13 +5329,13 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB50_13
-; RV64ZVE32F-NEXT: # %bb.7: # %else6
+; RV64ZVE32F-NEXT: .LBB50_7: # %else6
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB50_14
; RV64ZVE32F-NEXT: .LBB50_8: # %else8
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB50_10
-; RV64ZVE32F-NEXT: .LBB50_9: # %cond.store9
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
@@ -5571,10 +5348,10 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB50_15
-; RV64ZVE32F-NEXT: # %bb.11: # %else12
+; RV64ZVE32F-NEXT: .LBB50_11: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB50_16
-; RV64ZVE32F-NEXT: .LBB50_12: # %else14
+; RV64ZVE32F-NEXT: # %bb.12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -5584,8 +5361,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB50_8
+; RV64ZVE32F-NEXT: j .LBB50_7
; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5593,14 +5369,14 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB50_7
+; RV64ZVE32F-NEXT: j .LBB50_8
; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB50_10
+; RV64ZVE32F-NEXT: j .LBB50_11
; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -6270,71 +6046,69 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: .LBB58_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB58_12
-; RV64ZVE32F-NEXT: .LBB58_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB58_13
-; RV64ZVE32F-NEXT: .LBB58_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB58_14
-; RV64ZVE32F-NEXT: .LBB58_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB58_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB58_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB58_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB58_15
-; RV64ZVE32F-NEXT: .LBB58_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB58_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB58_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: .LBB58_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB58_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB58_5
-; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: .LBB58_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB58_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB58_6
-; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: .LBB58_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB58_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB58_7
+; RV64ZVE32F-NEXT: .LBB58_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB58_15
+; RV64ZVE32F-NEXT: .LBB58_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB58_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB58_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -6344,7 +6118,7 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB58_10
+; RV64ZVE32F-NEXT: j .LBB58_13
; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6414,71 +6188,69 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB59_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB59_12
-; RV64ZVE32F-NEXT: .LBB59_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB59_13
-; RV64ZVE32F-NEXT: .LBB59_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB59_14
-; RV64ZVE32F-NEXT: .LBB59_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB59_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB59_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB59_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB59_15
-; RV64ZVE32F-NEXT: .LBB59_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB59_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB59_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: .LBB59_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB59_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB59_5
-; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: .LBB59_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB59_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB59_6
-; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: .LBB59_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB59_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB59_7
+; RV64ZVE32F-NEXT: .LBB59_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB59_15
+; RV64ZVE32F-NEXT: .LBB59_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB59_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB59_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -6488,7 +6260,7 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB59_10
+; RV64ZVE32F-NEXT: j .LBB59_13
; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6559,75 +6331,73 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB60_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB60_12
-; RV64ZVE32F-NEXT: .LBB60_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB60_13
-; RV64ZVE32F-NEXT: .LBB60_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB60_14
-; RV64ZVE32F-NEXT: .LBB60_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB60_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB60_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB60_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB60_15
-; RV64ZVE32F-NEXT: .LBB60_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB60_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB60_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: .LBB60_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB60_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: zext.b a2, a2
-; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB60_5
-; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: .LBB60_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB60_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB60_6
-; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: .LBB60_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB60_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB60_7
+; RV64ZVE32F-NEXT: .LBB60_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB60_15
+; RV64ZVE32F-NEXT: .LBB60_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB60_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB60_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -6638,7 +6408,7 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB60_10
+; RV64ZVE32F-NEXT: j .LBB60_13
; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6708,71 +6478,68 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: .LBB61_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB61_12
-; RV64ZVE32F-NEXT: .LBB61_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB61_13
-; RV64ZVE32F-NEXT: .LBB61_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB61_14
-; RV64ZVE32F-NEXT: .LBB61_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB61_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB61_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB61_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB61_15
-; RV64ZVE32F-NEXT: .LBB61_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB61_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB61_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: .LBB61_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB61_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB61_5
-; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: .LBB61_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB61_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: slli a2, a2, 1
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB61_6
-; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: .LBB61_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB61_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB61_7
+; RV64ZVE32F-NEXT: .LBB61_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB61_15
+; RV64ZVE32F-NEXT: .LBB61_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB61_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB61_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -6782,7 +6549,7 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB61_10
+; RV64ZVE32F-NEXT: j .LBB61_13
; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -7324,63 +7091,62 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: .LBB68_4: # %else2
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_12
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_5: # %else4
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_13
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_6: # %else6
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_14
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_7: # %else8
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_9
-; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_9: # %else10
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_10: # %else12
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB68_16
-; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-ZVFH-NEXT: ret
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_12: # %cond.store3
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_6
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB68_5
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_13: # %cond.store5
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_6: # %else4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_8
+; RV64ZVE32F-ZVFH-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_8: # %else6
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_10
+; RV64ZVE32F-ZVFH-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB68_6
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_14: # %cond.store7
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_10: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB68_7
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_12: # %else10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_15
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_13: # %else12
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB68_16
+; RV64ZVE32F-ZVFH-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB68_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
@@ -7388,7 +7154,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB68_10
+; RV64ZVE32F-ZVFH-NEXT: j .LBB68_13
; RV64ZVE32F-ZVFH-NEXT: .LBB68_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7431,71 +7197,69 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_4: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_12
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_5: # %else4
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_13
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_6: # %else6
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_14
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_7: # %else8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_9
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_6
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_9: # %else10
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_10: # %else12
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB68_16
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-ZVFHMIN-NEXT: ret
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_12: # %cond.store3
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_6: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_8
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_13: # %cond.store5
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_8: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_10
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_6
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_14: # %cond.store7
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_10: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_12: # %else10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_15
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_13: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB68_16
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -7505,7 +7269,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_10
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_13
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -7571,63 +7335,62 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %else2
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_12
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_5: # %else4
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_13
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_6: # %else6
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_14
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_7: # %else8
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_9
-; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_9: # %else10
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_10: # %else12
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB69_16
-; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-ZVFH-NEXT: ret
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_12: # %cond.store3
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_6
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB69_5
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_13: # %cond.store5
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_6: # %else4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_8
+; RV64ZVE32F-ZVFH-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_8: # %else6
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_10
+; RV64ZVE32F-ZVFH-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB69_6
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_14: # %cond.store7
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_10: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB69_7
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_12: # %else10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_15
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_13: # %else12
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB69_16
+; RV64ZVE32F-ZVFH-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB69_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
@@ -7635,7 +7398,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB69_10
+; RV64ZVE32F-ZVFH-NEXT: j .LBB69_13
; RV64ZVE32F-ZVFH-NEXT: .LBB69_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7678,71 +7441,69 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_12
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_5: # %else4
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_13
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_6: # %else6
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_14
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_7: # %else8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_9
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_6
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_9: # %else10
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_10: # %else12
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB69_16
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-ZVFHMIN-NEXT: ret
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_12: # %cond.store3
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_6: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_8
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_13: # %cond.store5
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_8: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_10
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_6
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_14: # %cond.store7
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_10: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_12: # %else10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_15
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_13: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB69_16
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -7752,7 +7513,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_10
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_13
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -7819,67 +7580,66 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else2
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_12
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_5: # %else4
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_13
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %else6
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_14
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %else8
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_9
-; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_9: # %else10
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_10: # %else12
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_16
-; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-ZVFH-NEXT: ret
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_12: # %cond.store3
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_6
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB70_5
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_13: # %cond.store5
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %else4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_8
+; RV64ZVE32F-ZVFH-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %else6
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_10
+; RV64ZVE32F-ZVFH-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB70_6
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_14: # %cond.store7
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_10: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB70_7
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_12: # %else10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_15
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_13: # %else12
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_16
+; RV64ZVE32F-ZVFH-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB70_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFH-NEXT: zext.b a2, a2
@@ -7888,7 +7648,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB70_10
+; RV64ZVE32F-ZVFH-NEXT: j .LBB70_13
; RV64ZVE32F-ZVFH-NEXT: .LBB70_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7934,75 +7694,73 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_12
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %else4
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_13
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %else6
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_14
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %else8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_9
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_6
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a2, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_9: # %else10
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_10: # %else12
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_16
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-ZVFHMIN-NEXT: ret
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_12: # %cond.store3
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_8
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a2, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_13: # %cond.store5
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_10
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a2, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_6
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_14: # %cond.store7
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_10: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: zext.b a2, a2
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_12: # %else10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_15
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_13: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_16
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -8013,7 +7771,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_10
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_13
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -8079,62 +7837,61 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: .LBB71_4: # %else2
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_12
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_5: # %else4
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_13
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_6: # %else6
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_14
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_7: # %else8
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_9
-; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_9: # %else10
-; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
-; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_10: # %else12
-; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB71_16
-; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-ZVFH-NEXT: ret
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_12: # %cond.store3
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_6
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB71_5
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_13: # %cond.store5
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_6: # %else4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_8
+; RV64ZVE32F-ZVFH-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
-; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB71_6
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_14: # %cond.store7
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_8: # %else6
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_10
+; RV64ZVE32F-ZVFH-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_10: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
-; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB71_7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_12: # %else10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_15
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_13: # %else12
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB71_16
+; RV64ZVE32F-ZVFH-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB71_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
@@ -8142,7 +7899,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB71_10
+; RV64ZVE32F-ZVFH-NEXT: j .LBB71_13
; RV64ZVE32F-ZVFH-NEXT: .LBB71_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -8185,71 +7942,68 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_4: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_12
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_5: # %else4
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_13
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_6: # %else6
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_14
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_7: # %else8
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
-; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_9
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_6
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_9: # %else10
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_10: # %else12
-; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
-; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB71_16
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-ZVFHMIN-NEXT: ret
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_12: # %cond.store3
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_6: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_8
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_13: # %cond.store5
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_8: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_10
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_6
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_14: # %cond.store7
-; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_10: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_12: # %else10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_15
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_13: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB71_16
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -8259,7 +8013,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_10
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_13
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -8600,58 +8354,47 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: .LBB78_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB78_12
-; RV64ZVE32F-NEXT: .LBB78_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB78_13
-; RV64ZVE32F-NEXT: .LBB78_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB78_14
-; RV64ZVE32F-NEXT: .LBB78_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB78_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB78_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB78_15
-; RV64ZVE32F-NEXT: .LBB78_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB78_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB78_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB78_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB78_5
-; RV64ZVE32F-NEXT: .LBB78_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB78_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB78_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB78_6
-; RV64ZVE32F-NEXT: .LBB78_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB78_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB78_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB78_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB78_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -8659,7 +8402,16 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB78_7
+; RV64ZVE32F-NEXT: .LBB78_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB78_15
+; RV64ZVE32F-NEXT: .LBB78_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB78_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB78_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8668,7 +8420,7 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB78_10
+; RV64ZVE32F-NEXT: j .LBB78_13
; RV64ZVE32F-NEXT: .LBB78_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -8730,58 +8482,47 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB79_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB79_12
-; RV64ZVE32F-NEXT: .LBB79_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB79_13
-; RV64ZVE32F-NEXT: .LBB79_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB79_14
-; RV64ZVE32F-NEXT: .LBB79_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB79_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB79_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB79_15
-; RV64ZVE32F-NEXT: .LBB79_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB79_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB79_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB79_5
-; RV64ZVE32F-NEXT: .LBB79_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB79_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB79_6
-; RV64ZVE32F-NEXT: .LBB79_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB79_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB79_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -8789,7 +8530,16 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB79_7
+; RV64ZVE32F-NEXT: .LBB79_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB79_15
+; RV64ZVE32F-NEXT: .LBB79_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB79_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB79_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8798,7 +8548,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB79_10
+; RV64ZVE32F-NEXT: j .LBB79_13
; RV64ZVE32F-NEXT: .LBB79_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -8864,49 +8614,21 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB80_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB80_12
-; RV64ZVE32F-NEXT: .LBB80_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB80_13
-; RV64ZVE32F-NEXT: .LBB80_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB80_14
-; RV64ZVE32F-NEXT: .LBB80_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB80_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB80_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB80_15
-; RV64ZVE32F-NEXT: .LBB80_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB80_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB80_5
-; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB80_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -8914,20 +8636,46 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB80_6
-; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB80_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB80_7
+; RV64ZVE32F-NEXT: .LBB80_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB80_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB80_15
+; RV64ZVE32F-NEXT: .LBB80_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB80_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: zext.b a2, a2
@@ -8937,7 +8685,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB80_10
+; RV64ZVE32F-NEXT: j .LBB80_13
; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9003,66 +8751,64 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: .LBB81_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB81_12
-; RV64ZVE32F-NEXT: .LBB81_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB81_13
-; RV64ZVE32F-NEXT: .LBB81_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB81_14
-; RV64ZVE32F-NEXT: .LBB81_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB81_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB81_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB81_15
-; RV64ZVE32F-NEXT: .LBB81_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB81_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB81_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB81_5
-; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB81_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB81_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB81_6
-; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB81_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB81_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB81_7
+; RV64ZVE32F-NEXT: .LBB81_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB81_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB81_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB81_15
+; RV64ZVE32F-NEXT: .LBB81_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB81_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -9071,7 +8817,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB81_10
+; RV64ZVE32F-NEXT: j .LBB81_13
; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9135,66 +8881,64 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB82_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_12
-; RV64ZVE32F-NEXT: .LBB82_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_13
-; RV64ZVE32F-NEXT: .LBB82_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_14
-; RV64ZVE32F-NEXT: .LBB82_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB82_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB82_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_15
-; RV64ZVE32F-NEXT: .LBB82_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB82_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB82_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB82_5
-; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB82_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB82_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB82_6
-; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB82_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB82_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB82_7
+; RV64ZVE32F-NEXT: .LBB82_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB82_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB82_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB82_15
+; RV64ZVE32F-NEXT: .LBB82_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB82_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -9203,7 +8947,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB82_10
+; RV64ZVE32F-NEXT: j .LBB82_13
; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9270,40 +9014,10 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB83_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_12
-; RV64ZVE32F-NEXT: .LBB83_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_13
-; RV64ZVE32F-NEXT: .LBB83_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_14
-; RV64ZVE32F-NEXT: .LBB83_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 46
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB83_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
-; RV64ZVE32F-NEXT: .LBB83_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 48
; RV64ZVE32F-NEXT: srli a2, a2, 46
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -9311,8 +9025,10 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB83_5
-; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5
+; RV64ZVE32F-NEXT: .LBB83_6: # %else4
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -9320,12 +9036,15 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: srli a2, a2, 46
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB83_6
-; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: .LBB83_8: # %else6
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 48
; RV64ZVE32F-NEXT: srli a2, a2, 46
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -9333,7 +9052,30 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB83_7
+; RV64ZVE32F-NEXT: .LBB83_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB83_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
+; RV64ZVE32F-NEXT: .LBB83_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 48
@@ -9343,7 +9085,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB83_10
+; RV64ZVE32F-NEXT: j .LBB83_13
; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9404,63 +9146,64 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: .LBB84_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_12
-; RV64ZVE32F-NEXT: .LBB84_5: # %else4
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: slli a2, a2, 2
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vse32.v v13, (a2)
+; RV64ZVE32F-NEXT: .LBB84_6: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB84_13
-; RV64ZVE32F-NEXT: .LBB84_6: # %else6
+; RV64ZVE32F-NEXT: .LBB84_7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
-; RV64ZVE32F-NEXT: .LBB84_7: # %else8
+; RV64ZVE32F-NEXT: .LBB84_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: .LBB84_9: # %else10
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: .LBB84_10: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
-; RV64ZVE32F-NEXT: .LBB84_10: # %else12
+; RV64ZVE32F-NEXT: .LBB84_11: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB84_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
+; RV64ZVE32F-NEXT: # %bb.12: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: slli a2, a2, 2
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB84_5
; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB84_6
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
+; RV64ZVE32F-NEXT: j .LBB84_7
; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 2
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB84_7
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: j .LBB84_8
; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -9469,7 +9212,7 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB84_10
+; RV64ZVE32F-NEXT: j .LBB84_11
; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -10010,46 +9753,9 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: .LBB91_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_12
-; RV64ZVE32F-NEXT: .LBB91_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_13
-; RV64ZVE32F-NEXT: .LBB91_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_14
-; RV64ZVE32F-NEXT: .LBB91_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB91_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_15
-; RV64ZVE32F-NEXT: .LBB91_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB91_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB91_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB91_5
-; RV64ZVE32F-NEXT: .LBB91_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB91_6
-; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB91_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -10073,13 +9779,32 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB91_7
+; RV64ZVE32F-NEXT: .LBB91_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB91_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
+; RV64ZVE32F-NEXT: .LBB91_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB91_15
+; RV64ZVE32F-NEXT: .LBB91_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB91_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB91_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB91_10
+; RV64ZVE32F-NEXT: j .LBB91_13
; RV64ZVE32F-NEXT: .LBB91_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10215,46 +9940,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB92_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_12
-; RV64ZVE32F-NEXT: .LBB92_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_13
-; RV64ZVE32F-NEXT: .LBB92_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_14
-; RV64ZVE32F-NEXT: .LBB92_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB92_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_15
-; RV64ZVE32F-NEXT: .LBB92_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB92_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB92_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB92_5
-; RV64ZVE32F-NEXT: .LBB92_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB92_6
-; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB92_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -10278,13 +9966,32 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB92_7
+; RV64ZVE32F-NEXT: .LBB92_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB92_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
+; RV64ZVE32F-NEXT: .LBB92_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB92_15
+; RV64ZVE32F-NEXT: .LBB92_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB92_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB92_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB92_10
+; RV64ZVE32F-NEXT: j .LBB92_13
; RV64ZVE32F-NEXT: .LBB92_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10424,62 +10131,63 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: .LBB93_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB93_12
-; RV64ZVE32F-NEXT: .LBB93_5: # %else4
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB93_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
+; RV64ZVE32F-NEXT: .LBB93_6: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB93_13
-; RV64ZVE32F-NEXT: .LBB93_6: # %else6
+; RV64ZVE32F-NEXT: beqz a2, .LBB93_8
+; RV64ZVE32F-NEXT: # %bb.7: # %cond.store5
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
+; RV64ZVE32F-NEXT: .LBB93_8: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB93_14
-; RV64ZVE32F-NEXT: .LBB93_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB93_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB93_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
+; RV64ZVE32F-NEXT: zext.b a2, a2
+; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
+; RV64ZVE32F-NEXT: .LBB93_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB93_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB93_9: # %else10
+; RV64ZVE32F-NEXT: .LBB93_12: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB93_15
-; RV64ZVE32F-NEXT: .LBB93_10: # %else12
+; RV64ZVE32F-NEXT: .LBB93_13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB93_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB93_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB93_5
-; RV64ZVE32F-NEXT: .LBB93_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB93_6
-; RV64ZVE32F-NEXT: .LBB93_14: # %cond.store7
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: andi a2, a2, 255
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB93_7
; RV64ZVE32F-NEXT: .LBB93_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB93_10
+; RV64ZVE32F-NEXT: j .LBB93_13
; RV64ZVE32F-NEXT: .LBB93_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10619,46 +10327,9 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV64ZVE32F-NEXT: .LBB94_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB94_12
-; RV64ZVE32F-NEXT: .LBB94_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB94_13
-; RV64ZVE32F-NEXT: .LBB94_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB94_14
-; RV64ZVE32F-NEXT: .LBB94_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB94_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB94_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB94_15
-; RV64ZVE32F-NEXT: .LBB94_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB94_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB94_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB94_5
-; RV64ZVE32F-NEXT: .LBB94_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB94_6
-; RV64ZVE32F-NEXT: .LBB94_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB94_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -10682,13 +10353,32 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB94_7
+; RV64ZVE32F-NEXT: .LBB94_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB94_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
+; RV64ZVE32F-NEXT: .LBB94_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB94_15
+; RV64ZVE32F-NEXT: .LBB94_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB94_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB94_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB94_10
+; RV64ZVE32F-NEXT: j .LBB94_13
; RV64ZVE32F-NEXT: .LBB94_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10826,46 +10516,9 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB95_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB95_12
-; RV64ZVE32F-NEXT: .LBB95_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB95_13
-; RV64ZVE32F-NEXT: .LBB95_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB95_14
-; RV64ZVE32F-NEXT: .LBB95_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB95_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB95_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB95_15
-; RV64ZVE32F-NEXT: .LBB95_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB95_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB95_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB95_5
-; RV64ZVE32F-NEXT: .LBB95_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB95_6
-; RV64ZVE32F-NEXT: .LBB95_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB95_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -10889,13 +10542,32 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB95_7
+; RV64ZVE32F-NEXT: .LBB95_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB95_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
+; RV64ZVE32F-NEXT: .LBB95_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB95_15
+; RV64ZVE32F-NEXT: .LBB95_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB95_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB95_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB95_10
+; RV64ZVE32F-NEXT: j .LBB95_13
; RV64ZVE32F-NEXT: .LBB95_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11037,49 +10709,9 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB96_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB96_12
-; RV64ZVE32F-NEXT: .LBB96_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB96_13
-; RV64ZVE32F-NEXT: .LBB96_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB96_14
-; RV64ZVE32F-NEXT: .LBB96_7: # %else8
-; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB96_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 45
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB96_9: # %else10
-; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB96_15
-; RV64ZVE32F-NEXT: .LBB96_10: # %else12
-; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB96_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 45
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB96_5
-; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store5
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 48
-; RV64ZVE32F-NEXT: srli a2, a2, 45
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB96_6
-; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store7
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB96_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 48
; RV64ZVE32F-NEXT: srli a2, a2, 45
@@ -11106,15 +10738,35 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB96_7
-; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: .LBB96_10: # %else8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB96_12
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 48
; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB96_10
-; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13
+; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
+; RV64ZVE32F-NEXT: .LBB96_12: # %else10
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT: bnez a2, .LBB96_15
+; RV64ZVE32F-NEXT: .LBB96_13: # %else12
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB96_16
+; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: ret
+; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 45
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
+; RV64ZVE32F-NEXT: j .LBB96_13
+; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 48
@@ -11251,38 +10903,41 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV64ZVE32F-NEXT: .LBB97_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB97_12
-; RV64ZVE32F-NEXT: .LBB97_5: # %else4
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB97_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
+; RV64ZVE32F-NEXT: .LBB97_6: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB97_13
-; RV64ZVE32F-NEXT: .LBB97_6: # %else6
+; RV64ZVE32F-NEXT: .LBB97_7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
-; RV64ZVE32F-NEXT: .LBB97_7: # %else8
+; RV64ZVE32F-NEXT: .LBB97_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: beqz a2, .LBB97_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB97_9: # %else10
+; RV64ZVE32F-NEXT: .LBB97_10: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_15
-; RV64ZVE32F-NEXT: .LBB97_10: # %else12
+; RV64ZVE32F-NEXT: .LBB97_11: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB97_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
+; RV64ZVE32F-NEXT: # %bb.12: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB97_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB97_5
; RV64ZVE32F-NEXT: .LBB97_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
@@ -11290,20 +10945,20 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB97_6
+; RV64ZVE32F-NEXT: j .LBB97_7
; RV64ZVE32F-NEXT: .LBB97_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB97_7
+; RV64ZVE32F-NEXT: j .LBB97_8
; RV64ZVE32F-NEXT: .LBB97_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB97_10
+; RV64ZVE32F-NEXT: j .LBB97_11
; RV64ZVE32F-NEXT: .LBB97_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11439,38 +11094,41 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB98_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB98_12
-; RV64ZVE32F-NEXT: .LBB98_5: # %else4
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB98_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
+; RV64ZVE32F-NEXT: .LBB98_6: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB98_13
-; RV64ZVE32F-NEXT: .LBB98_6: # %else6
+; RV64ZVE32F-NEXT: .LBB98_7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
-; RV64ZVE32F-NEXT: .LBB98_7: # %else8
+; RV64ZVE32F-NEXT: .LBB98_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: beqz a2, .LBB98_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB98_9: # %else10
+; RV64ZVE32F-NEXT: .LBB98_10: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB98_15
-; RV64ZVE32F-NEXT: .LBB98_10: # %else12
+; RV64ZVE32F-NEXT: .LBB98_11: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB98_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
+; RV64ZVE32F-NEXT: # %bb.12: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB98_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 3
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB98_5
; RV64ZVE32F-NEXT: .LBB98_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
@@ -11478,20 +11136,20 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB98_6
+; RV64ZVE32F-NEXT: j .LBB98_7
; RV64ZVE32F-NEXT: .LBB98_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB98_7
+; RV64ZVE32F-NEXT: j .LBB98_8
; RV64ZVE32F-NEXT: .LBB98_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB98_10
+; RV64ZVE32F-NEXT: j .LBB98_11
; RV64ZVE32F-NEXT: .LBB98_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11630,40 +11288,43 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: .LBB99_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB99_12
-; RV64ZVE32F-NEXT: .LBB99_5: # %else4
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB99_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 32
+; RV64ZVE32F-NEXT: srli a2, a2, 29
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
+; RV64ZVE32F-NEXT: .LBB99_6: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB99_13
-; RV64ZVE32F-NEXT: .LBB99_6: # %else6
+; RV64ZVE32F-NEXT: .LBB99_7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB99_14
-; RV64ZVE32F-NEXT: .LBB99_7: # %else8
+; RV64ZVE32F-NEXT: .LBB99_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB99_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store9
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: beqz a2, .LBB99_10
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB99_9: # %else10
+; RV64ZVE32F-NEXT: .LBB99_10: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB99_15
-; RV64ZVE32F-NEXT: .LBB99_10: # %else12
+; RV64ZVE32F-NEXT: .LBB99_11: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB99_16
-; RV64ZVE32F-NEXT: # %bb.11: # %else14
+; RV64ZVE32F-NEXT: # %bb.12: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB99_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: slli a2, a2, 32
-; RV64ZVE32F-NEXT: srli a2, a2, 29
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB99_5
; RV64ZVE32F-NEXT: .LBB99_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
@@ -11672,7 +11333,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB99_6
+; RV64ZVE32F-NEXT: j .LBB99_7
; RV64ZVE32F-NEXT: .LBB99_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -11680,14 +11341,14 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB99_7
+; RV64ZVE32F-NEXT: j .LBB99_8
; RV64ZVE32F-NEXT: .LBB99_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB99_10
+; RV64ZVE32F-NEXT: j .LBB99_11
; RV64ZVE32F-NEXT: .LBB99_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11941,16 +11602,9 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB101_4: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB101_25
-; RV64ZVE32F-NEXT: .LBB101_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB101_26
-; RV64ZVE32F-NEXT: .LBB101_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB101_8
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -11993,97 +11647,17 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB101_27
-; RV64ZVE32F-NEXT: .LBB101_11: # %else12
-; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: bnez a2, .LBB101_28
-; RV64ZVE32F-NEXT: .LBB101_12: # %else14
-; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: bnez a2, .LBB101_29
-; RV64ZVE32F-NEXT: .LBB101_13: # %else16
-; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: beqz a2, .LBB101_15
-; RV64ZVE32F-NEXT: # %bb.14: # %cond.store17
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9
-; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: .LBB101_15: # %else18
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
-; RV64ZVE32F-NEXT: andi a2, a1, 1024
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB101_30
-; RV64ZVE32F-NEXT: .LBB101_16: # %else20
-; RV64ZVE32F-NEXT: slli a2, a1, 52
-; RV64ZVE32F-NEXT: bltz a2, .LBB101_31
-; RV64ZVE32F-NEXT: .LBB101_17: # %else22
-; RV64ZVE32F-NEXT: slli a2, a1, 51
-; RV64ZVE32F-NEXT: bltz a2, .LBB101_32
-; RV64ZVE32F-NEXT: .LBB101_18: # %else24
-; RV64ZVE32F-NEXT: slli a2, a1, 50
-; RV64ZVE32F-NEXT: bgez a2, .LBB101_20
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.store25
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13
-; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: .LBB101_20: # %else26
-; RV64ZVE32F-NEXT: slli a2, a1, 49
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-NEXT: bgez a2, .LBB101_22
-; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14
-; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: .LBB101_22: # %else28
-; RV64ZVE32F-NEXT: lui a2, 1048568
-; RV64ZVE32F-NEXT: and a1, a1, a2
-; RV64ZVE32F-NEXT: beqz a1, .LBB101_24
-; RV64ZVE32F-NEXT: # %bb.23: # %cond.store29
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a1, v9
-; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15
-; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: .LBB101_24: # %else30
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB101_25: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
-; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB101_5
-; RV64ZVE32F-NEXT: .LBB101_26: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
-; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB101_6
-; RV64ZVE32F-NEXT: .LBB101_27: # %cond.store11
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_14
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB101_11
-; RV64ZVE32F-NEXT: .LBB101_28: # %cond.store13
+; RV64ZVE32F-NEXT: .LBB101_14: # %else12
+; RV64ZVE32F-NEXT: andi a2, a1, 128
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_16
+; RV64ZVE32F-NEXT: # %bb.15: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -12091,23 +11665,43 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB101_12
-; RV64ZVE32F-NEXT: .LBB101_29: # %cond.store15
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: .LBB101_16: # %else14
+; RV64ZVE32F-NEXT: andi a2, a1, 256
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_18
+; RV64ZVE32F-NEXT: # %bb.17: # %cond.store15
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB101_13
-; RV64ZVE32F-NEXT: .LBB101_30: # %cond.store19
-; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: .LBB101_18: # %else16
+; RV64ZVE32F-NEXT: andi a2, a1, 512
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_20
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.store17
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9
+; RV64ZVE32F-NEXT: vse8.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB101_20: # %else18
+; RV64ZVE32F-NEXT: andi a2, a1, 1024
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_22
+; RV64ZVE32F-NEXT: # %bb.21: # %cond.store19
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB101_16
-; RV64ZVE32F-NEXT: .LBB101_31: # %cond.store21
+; RV64ZVE32F-NEXT: .LBB101_22: # %else20
+; RV64ZVE32F-NEXT: slli a2, a1, 52
+; RV64ZVE32F-NEXT: bgez a2, .LBB101_24
+; RV64ZVE32F-NEXT: # %bb.23: # %cond.store21
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -12124,16 +11718,44 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 11
-; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB101_17
-; RV64ZVE32F-NEXT: .LBB101_32: # %cond.store23
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 12
+; RV64ZVE32F-NEXT: vse8.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB101_26: # %else24
+; RV64ZVE32F-NEXT: slli a2, a1, 50
+; RV64ZVE32F-NEXT: bgez a2, .LBB101_28
+; RV64ZVE32F-NEXT: # %bb.27: # %cond.store25
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 12
-; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB101_18
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 13
+; RV64ZVE32F-NEXT: vse8.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB101_28: # %else26
+; RV64ZVE32F-NEXT: slli a2, a1, 49
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-NEXT: bgez a2, .LBB101_30
+; RV64ZVE32F-NEXT: # %bb.29: # %cond.store27
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14
+; RV64ZVE32F-NEXT: vse8.v v10, (a2)
+; RV64ZVE32F-NEXT: .LBB101_30: # %else28
+; RV64ZVE32F-NEXT: lui a2, 1048568
+; RV64ZVE32F-NEXT: and a1, a1, a2
+; RV64ZVE32F-NEXT: beqz a1, .LBB101_32
+; RV64ZVE32F-NEXT: # %bb.31: # %cond.store29
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-NEXT: add a0, a0, a1
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15
+; RV64ZVE32F-NEXT: vse8.v v8, (a0)
+; RV64ZVE32F-NEXT: .LBB101_32: # %else30
+; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, i32 1, <16 x i1> %m)
ret void
@@ -12194,14 +11816,10 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB102_49
-; RV64ZVE32F-NEXT: .LBB102_5: # %else4
-; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB102_50
-; RV64ZVE32F-NEXT: .LBB102_6: # %else6
-; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB102_8
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_6
+; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 2
; RV64ZVE32F-NEXT: vse8.v v13, (a2)
@@ -12241,9 +11859,15 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB102_12: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB102_51
-; RV64ZVE32F-NEXT: .LBB102_11: # %else12
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_14
+; RV64ZVE32F-NEXT: # %bb.13: # %cond.store11
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 6
+; RV64ZVE32F-NEXT: vse8.v v13, (a2)
+; RV64ZVE32F-NEXT: .LBB102_14: # %else12
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB102_16
; RV64ZVE32F-NEXT: # %bb.15: # %cond.store13
@@ -12267,8 +11891,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vse8.v v13, (a2)
; RV64ZVE32F-NEXT: .LBB102_18: # %else16
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: beqz a2, .LBB102_15
-; RV64ZVE32F-NEXT: # %bb.14: # %cond.store17
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_20
+; RV64ZVE32F-NEXT: # %bb.19: # %cond.store17
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@@ -12323,20 +11947,26 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB102_28: # %else26
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_54
-; RV64ZVE32F-NEXT: .LBB102_24: # %else28
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_30
+; RV64ZVE32F-NEXT: # %bb.29: # %cond.store27
+; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 14
+; RV64ZVE32F-NEXT: vse8.v v13, (a2)
+; RV64ZVE32F-NEXT: .LBB102_30: # %else28
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16
; RV64ZVE32F-NEXT: bltz a2, .LBB102_63
-; RV64ZVE32F-NEXT: # %bb.31: # %else30
+; RV64ZVE32F-NEXT: .LBB102_31: # %else30
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bltz a2, .LBB102_64
; RV64ZVE32F-NEXT: .LBB102_32: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 46
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_28
-; RV64ZVE32F-NEXT: # %bb.27: # %cond.store33
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_34
+; RV64ZVE32F-NEXT: # %bb.33: # %cond.store33
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -12348,16 +11978,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: .LBB102_34: # %else34
; RV64ZVE32F-NEXT: slli a2, a1, 45
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_57
-; RV64ZVE32F-NEXT: .LBB102_29: # %else36
-; RV64ZVE32F-NEXT: slli a2, a1, 44
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_58
-; RV64ZVE32F-NEXT: .LBB102_30: # %else38
-; RV64ZVE32F-NEXT: slli a2, a1, 43
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_32
-; RV64ZVE32F-NEXT: # %bb.31: # %cond.store39
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_36
+; RV64ZVE32F-NEXT: # %bb.35: # %cond.store35
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
@@ -12404,212 +12027,142 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: slli a2, a1, 41
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_59
-; RV64ZVE32F-NEXT: .LBB102_35: # %else44
-; RV64ZVE32F-NEXT: slli a2, a1, 40
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_60
-; RV64ZVE32F-NEXT: .LBB102_36: # %else46
-; RV64ZVE32F-NEXT: slli a2, a1, 39
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_61
-; RV64ZVE32F-NEXT: .LBB102_37: # %else48
-; RV64ZVE32F-NEXT: slli a2, a1, 38
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_39
-; RV64ZVE32F-NEXT: # %bb.38: # %cond.store49
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_44
+; RV64ZVE32F-NEXT: # %bb.43: # %cond.store43
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB102_39: # %else50
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
-; RV64ZVE32F-NEXT: slli a2, a1, 37
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_62
-; RV64ZVE32F-NEXT: .LBB102_40: # %else52
-; RV64ZVE32F-NEXT: slli a2, a1, 36
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_63
-; RV64ZVE32F-NEXT: .LBB102_41: # %else54
-; RV64ZVE32F-NEXT: slli a2, a1, 35
-; RV64ZVE32F-NEXT: bltz a2, .LBB102_64
-; RV64ZVE32F-NEXT: .LBB102_42: # %else56
-; RV64ZVE32F-NEXT: slli a2, a1, 34
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_44
-; RV64ZVE32F-NEXT: # %bb.43: # %cond.store57
+; RV64ZVE32F-NEXT: .LBB102_44: # %else44
+; RV64ZVE32F-NEXT: slli a2, a1, 40
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_46
+; RV64ZVE32F-NEXT: # %bb.45: # %cond.store45
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB102_44: # %else58
-; RV64ZVE32F-NEXT: slli a2, a1, 33
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bgez a2, .LBB102_46
-; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59
+; RV64ZVE32F-NEXT: .LBB102_46: # %else46
+; RV64ZVE32F-NEXT: slli a2, a1, 39
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_48
+; RV64ZVE32F-NEXT: # %bb.47: # %cond.store47
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB102_46: # %else60
-; RV64ZVE32F-NEXT: lui a2, 524288
-; RV64ZVE32F-NEXT: and a1, a1, a2
-; RV64ZVE32F-NEXT: beqz a1, .LBB102_48
-; RV64ZVE32F-NEXT: # %bb.47: # %cond.store61
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a1, v10
-; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: .LBB102_48: # %else62
-; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB102_49: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2
-; RV64ZVE32F-NEXT: vse8.v v14, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_5
-; RV64ZVE32F-NEXT: .LBB102_50: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
-; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_6
-; RV64ZVE32F-NEXT: .LBB102_51: # %cond.store11
-; RV64ZVE32F-NEXT: vmv.x.s a2, v13
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6
-; RV64ZVE32F-NEXT: vse8.v v14, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_11
-; RV64ZVE32F-NEXT: .LBB102_52: # %cond.store13
+; RV64ZVE32F-NEXT: .LBB102_48: # %else48
+; RV64ZVE32F-NEXT: slli a2, a1, 38
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_50
+; RV64ZVE32F-NEXT: # %bb.49: # %cond.store49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v13
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 7
-; RV64ZVE32F-NEXT: vse8.v v13, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_12
-; RV64ZVE32F-NEXT: .LBB102_53: # %cond.store15
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 8
-; RV64ZVE32F-NEXT: vse8.v v13, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_13
-; RV64ZVE32F-NEXT: .LBB102_54: # %cond.store27
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_24
-; RV64ZVE32F-NEXT: .LBB102_55: # %cond.store29
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
+; RV64ZVE32F-NEXT: .LBB102_50: # %else50
+; RV64ZVE32F-NEXT: slli a2, a1, 37
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_52
+; RV64ZVE32F-NEXT: # %bb.51: # %cond.store51
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15
-; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_25
-; RV64ZVE32F-NEXT: .LBB102_56: # %cond.store31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_26
-; RV64ZVE32F-NEXT: .LBB102_57: # %cond.store35
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
-; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vse8.v v14, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_29
-; RV64ZVE32F-NEXT: .LBB102_58: # %cond.store37
+; RV64ZVE32F-NEXT: .LBB102_52: # %else52
+; RV64ZVE32F-NEXT: slli a2, a1, 36
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_54
+; RV64ZVE32F-NEXT: # %bb.53: # %cond.store53
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v12
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_30
-; RV64ZVE32F-NEXT: .LBB102_59: # %cond.store43
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: .LBB102_54: # %else54
+; RV64ZVE32F-NEXT: slli a2, a1, 35
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_56
+; RV64ZVE32F-NEXT: # %bb.55: # %cond.store55
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_35
-; RV64ZVE32F-NEXT: .LBB102_60: # %cond.store45
+; RV64ZVE32F-NEXT: .LBB102_56: # %else56
+; RV64ZVE32F-NEXT: slli a2, a1, 34
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_58
+; RV64ZVE32F-NEXT: # %bb.57: # %cond.store57
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_36
-; RV64ZVE32F-NEXT: .LBB102_61: # %cond.store47
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
+; RV64ZVE32F-NEXT: .LBB102_58: # %else58
+; RV64ZVE32F-NEXT: slli a2, a1, 33
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_60
+; RV64ZVE32F-NEXT: # %bb.59: # %cond.store59
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_37
-; RV64ZVE32F-NEXT: .LBB102_62: # %cond.store51
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: .LBB102_60: # %else60
+; RV64ZVE32F-NEXT: lui a2, 524288
+; RV64ZVE32F-NEXT: and a1, a1, a2
+; RV64ZVE32F-NEXT: beqz a1, .LBB102_62
+; RV64ZVE32F-NEXT: # %bb.61: # %cond.store61
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT: vmv.x.s a1, v10
+; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_40
-; RV64ZVE32F-NEXT: .LBB102_63: # %cond.store53
+; RV64ZVE32F-NEXT: vse8.v v8, (a0)
+; RV64ZVE32F-NEXT: .LBB102_62: # %else62
+; RV64ZVE32F-NEXT: ret
+; RV64ZVE32F-NEXT: .LBB102_63: # %cond.store29
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_41
-; RV64ZVE32F-NEXT: .LBB102_64: # %cond.store55
+; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15
+; RV64ZVE32F-NEXT: vse8.v v11, (a2)
+; RV64ZVE32F-NEXT: j .LBB102_31
+; RV64ZVE32F-NEXT: .LBB102_64: # %cond.store31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
-; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
+; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_42
+; RV64ZVE32F-NEXT: j .LBB102_32
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %val, <32 x ptr> %ptrs, i32 1, <32 x i1> %m)
ret void
diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll
index 7353acd7228cd..d6b89a66f5cc3 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll
@@ -8,30 +8,30 @@ define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a4, a2, e8, m8, ta, ma
-; CHECK-NEXT: bne a4, a2, .LBB0_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a1)
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_2: # %if.then
+; CHECK-NEXT: beq a4, a2, .LBB0_4
+; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: sub a5, a2, a4
; CHECK-NEXT: mv a3, a0
-; CHECK-NEXT: .LBB0_3: # %do.body
+; CHECK-NEXT: .LBB0_2: # %do.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: add a3, a3, a4
; CHECK-NEXT: add a1, a1, a4
-; CHECK-NEXT: bltu a3, a5, .LBB0_3
-; CHECK-NEXT: # %bb.4: # %do.end
+; CHECK-NEXT: bltu a3, a5, .LBB0_2
+; CHECK-NEXT: # %bb.3: # %do.end
; CHECK-NEXT: sub a2, a2, a3
; CHECK-NEXT: vsetvli a2, a2, e8, m8, ta, ma
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a1)
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
entry:
%0 = ptrtoint ptr %a0 to i64
%1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a2, i64 0, i64 3)
>From 4e4ed6bbeb348690a3e461cbfa0e6dafcf1e6ca5 Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Thu, 10 Apr 2025 14:45:14 -0300
Subject: [PATCH 4/6] Create method to add the branch folding pass with the
option to enable tail merge
Signed-off-by: Mikhail R. Gadelha <mikhail at igalia.com>
---
llvm/include/llvm/CodeGen/Passes.h | 2 ++
llvm/lib/CodeGen/BranchFolding.cpp | 12 ++++++++++--
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 3 ++-
3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index d214ab9306c2f..426e430b947f9 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -257,6 +257,8 @@ namespace llvm {
/// branches.
extern char &BranchFolderPassID;
+ MachineFunctionPass *createBranchFolderPass(bool EnableTailMerge);
+
/// BranchRelaxation - This pass replaces branches that need to jump further
/// than is supported by a branch instruction.
extern char &BranchRelaxationPassID;
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 6f5afbd2a996a..5d015d7313b5c 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -90,10 +90,13 @@ namespace {
/// BranchFolderPass - Wrap branch folder in a machine function pass.
class BranchFolderLegacy : public MachineFunctionPass {
+ bool EnableTailMerge;
+
public:
static char ID;
- explicit BranchFolderLegacy() : MachineFunctionPass(ID) {}
+ explicit BranchFolderLegacy(bool EnableTailMerge = true)
+ : MachineFunctionPass(ID), EnableTailMerge(EnableTailMerge) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -152,7 +155,8 @@ bool BranchFolderLegacy::runOnMachineFunction(MachineFunction &MF) {
// TailMerge can create jump into if branches that make CFG irreducible for
// HW that requires structurized CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
- PassConfig->getEnableTailMerge();
+ PassConfig->getEnableTailMerge() &&
+ this->EnableTailMerge;
MBFIWrapper MBBFreqInfo(
getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
BranchFolder Folder(
@@ -2080,3 +2084,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
++NumHoist;
return true;
}
+
+MachineFunctionPass *llvm::createBranchFolderPass(bool EnableTailMerge = true) {
+ return new BranchFolderLegacy(EnableTailMerge);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 63bd0f4c20497..543bed9281fc5 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -18,6 +18,7 @@
#include "RISCVTargetTransformInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BranchFoldingPass.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
@@ -570,7 +571,7 @@ void RISCVPassConfig::addPreEmitPass() {
addPass(createMachineCopyPropagationPass(true));
if (TM->getOptLevel() >= CodeGenOptLevel::Default)
addPass(createRISCVLateBranchOptPass());
- addPass(&BranchFolderPassID);
+ addPass(createBranchFolderPass(false));
addPass(&BranchRelaxationPassID);
addPass(createRISCVMakeCompressibleOptPass());
}
>From 6afb625b241efa05b7fa049cd195b36ede232c53 Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Thu, 10 Apr 2025 14:45:22 -0300
Subject: [PATCH 5/6] Update tests
Signed-off-by: Mikhail R. Gadelha <mikhail at igalia.com>
---
.../CodeGen/RISCV/GlobalISel/rotl-rotr.ll | 831 ++--
.../CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll | 37 +-
llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll | 267 +-
...lar-shift-by-byte-multiple-legalization.ll | 2787 ++++++------
llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll | 40 +-
llvm/test/CodeGen/RISCV/copyprop.ll | 15 +-
.../CodeGen/RISCV/double-maximum-minimum.ll | 63 +-
.../CodeGen/RISCV/float-maximum-minimum.ll | 56 +-
llvm/test/CodeGen/RISCV/forced-atomics.ll | 25 +-
llvm/test/CodeGen/RISCV/fpclamptosat.ll | 368 +-
.../CodeGen/RISCV/half-maximum-minimum.ll | 28 +-
llvm/test/CodeGen/RISCV/machine-pipeliner.ll | 18 +-
.../RISCV/reduce-unnecessary-extension.ll | 16 +-
.../test/CodeGen/RISCV/riscv-tail-dup-size.ll | 11 +-
llvm/test/CodeGen/RISCV/rv32zbb.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/copyprop.mir | 7 +-
llvm/test/CodeGen/RISCV/rvv/expandload.ll | 3996 ++++++++++++-----
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 2571 ++++++-----
.../RISCV/rvv/fixed-vectors-masked-scatter.ll | 1542 ++++---
.../RISCV/rvv/fixed-vectors-unaligned.ll | 47 +-
.../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 612 +--
.../RISCV/rvv/vsetvli-insert-crossbb.ll | 32 +-
llvm/test/CodeGen/RISCV/sadd_sat.ll | 42 +-
llvm/test/CodeGen/RISCV/sadd_sat_plus.ll | 42 +-
llvm/test/CodeGen/RISCV/shifts.ll | 5 +-
llvm/test/CodeGen/RISCV/simplify-condbr.ll | 19 +-
llvm/test/CodeGen/RISCV/ssub_sat.ll | 42 +-
llvm/test/CodeGen/RISCV/ssub_sat_plus.ll | 42 +-
28 files changed, 8207 insertions(+), 5368 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll
index da8678f9a9916..8a786fc9993d2 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll
@@ -296,43 +296,44 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: bltu a5, a4, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a6, a1, a5
-; RV32I-NEXT: j .LBB3_3
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: bnez a5, .LBB3_3
+; RV32I-NEXT: j .LBB3_4
; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: srl a3, a0, a2
; RV32I-NEXT: neg a6, a5
; RV32I-NEXT: sll a6, a1, a6
; RV32I-NEXT: or a6, a3, a6
-; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: beqz a5, .LBB3_5
-; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: beqz a5, .LBB3_4
+; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: mv a3, a6
-; RV32I-NEXT: .LBB3_5:
+; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: neg a6, a2
-; RV32I-NEXT: bltu a5, a4, .LBB3_9
-; RV32I-NEXT: # %bb.6:
+; RV32I-NEXT: bltu a5, a4, .LBB3_7
+; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: li a2, 0
-; RV32I-NEXT: .LBB3_7:
; RV32I-NEXT: andi a5, a6, 63
-; RV32I-NEXT: bgeu a5, a4, .LBB3_10
-; RV32I-NEXT: # %bb.8:
+; RV32I-NEXT: bgeu a5, a4, .LBB3_8
+; RV32I-NEXT: .LBB3_6:
; RV32I-NEXT: sll a4, a0, a6
; RV32I-NEXT: neg a7, a5
; RV32I-NEXT: srl a0, a0, a7
; RV32I-NEXT: sll a6, a1, a6
; RV32I-NEXT: or a0, a0, a6
-; RV32I-NEXT: bnez a5, .LBB3_11
-; RV32I-NEXT: j .LBB3_12
-; RV32I-NEXT: .LBB3_9:
+; RV32I-NEXT: bnez a5, .LBB3_9
+; RV32I-NEXT: j .LBB3_10
+; RV32I-NEXT: .LBB3_7:
; RV32I-NEXT: srl a2, a1, a2
-; RV32I-NEXT: j .LBB3_7
-; RV32I-NEXT: .LBB3_10:
+; RV32I-NEXT: andi a5, a6, 63
+; RV32I-NEXT: bltu a5, a4, .LBB3_6
+; RV32I-NEXT: .LBB3_8:
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: sll a0, a0, a5
-; RV32I-NEXT: beqz a5, .LBB3_12
-; RV32I-NEXT: .LBB3_11:
+; RV32I-NEXT: beqz a5, .LBB3_10
+; RV32I-NEXT: .LBB3_9:
; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: .LBB3_12:
+; RV32I-NEXT: .LBB3_10:
; RV32I-NEXT: or a0, a3, a4
; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: ret
@@ -352,43 +353,44 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
; RV32ZBB-NEXT: bltu a5, a4, .LBB3_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: srl a6, a1, a5
-; RV32ZBB-NEXT: j .LBB3_3
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: bnez a5, .LBB3_3
+; RV32ZBB-NEXT: j .LBB3_4
; RV32ZBB-NEXT: .LBB3_2:
; RV32ZBB-NEXT: srl a3, a0, a2
; RV32ZBB-NEXT: neg a6, a5
; RV32ZBB-NEXT: sll a6, a1, a6
; RV32ZBB-NEXT: or a6, a3, a6
-; RV32ZBB-NEXT: .LBB3_3:
; RV32ZBB-NEXT: mv a3, a0
-; RV32ZBB-NEXT: beqz a5, .LBB3_5
-; RV32ZBB-NEXT: # %bb.4:
+; RV32ZBB-NEXT: beqz a5, .LBB3_4
+; RV32ZBB-NEXT: .LBB3_3:
; RV32ZBB-NEXT: mv a3, a6
-; RV32ZBB-NEXT: .LBB3_5:
+; RV32ZBB-NEXT: .LBB3_4:
; RV32ZBB-NEXT: neg a6, a2
-; RV32ZBB-NEXT: bltu a5, a4, .LBB3_9
-; RV32ZBB-NEXT: # %bb.6:
+; RV32ZBB-NEXT: bltu a5, a4, .LBB3_7
+; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: li a2, 0
-; RV32ZBB-NEXT: .LBB3_7:
; RV32ZBB-NEXT: andi a5, a6, 63
-; RV32ZBB-NEXT: bgeu a5, a4, .LBB3_10
-; RV32ZBB-NEXT: # %bb.8:
+; RV32ZBB-NEXT: bgeu a5, a4, .LBB3_8
+; RV32ZBB-NEXT: .LBB3_6:
; RV32ZBB-NEXT: sll a4, a0, a6
; RV32ZBB-NEXT: neg a7, a5
; RV32ZBB-NEXT: srl a0, a0, a7
; RV32ZBB-NEXT: sll a6, a1, a6
; RV32ZBB-NEXT: or a0, a0, a6
-; RV32ZBB-NEXT: bnez a5, .LBB3_11
-; RV32ZBB-NEXT: j .LBB3_12
-; RV32ZBB-NEXT: .LBB3_9:
+; RV32ZBB-NEXT: bnez a5, .LBB3_9
+; RV32ZBB-NEXT: j .LBB3_10
+; RV32ZBB-NEXT: .LBB3_7:
; RV32ZBB-NEXT: srl a2, a1, a2
-; RV32ZBB-NEXT: j .LBB3_7
-; RV32ZBB-NEXT: .LBB3_10:
+; RV32ZBB-NEXT: andi a5, a6, 63
+; RV32ZBB-NEXT: bltu a5, a4, .LBB3_6
+; RV32ZBB-NEXT: .LBB3_8:
; RV32ZBB-NEXT: li a4, 0
; RV32ZBB-NEXT: sll a0, a0, a5
-; RV32ZBB-NEXT: beqz a5, .LBB3_12
-; RV32ZBB-NEXT: .LBB3_11:
+; RV32ZBB-NEXT: beqz a5, .LBB3_10
+; RV32ZBB-NEXT: .LBB3_9:
; RV32ZBB-NEXT: mv a1, a0
-; RV32ZBB-NEXT: .LBB3_12:
+; RV32ZBB-NEXT: .LBB3_10:
; RV32ZBB-NEXT: or a0, a3, a4
; RV32ZBB-NEXT: or a1, a2, a1
; RV32ZBB-NEXT: ret
@@ -405,43 +407,44 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
; RV32XTHEADBB-NEXT: bltu a5, a4, .LBB3_2
; RV32XTHEADBB-NEXT: # %bb.1:
; RV32XTHEADBB-NEXT: srl a6, a1, a5
-; RV32XTHEADBB-NEXT: j .LBB3_3
+; RV32XTHEADBB-NEXT: mv a3, a0
+; RV32XTHEADBB-NEXT: bnez a5, .LBB3_3
+; RV32XTHEADBB-NEXT: j .LBB3_4
; RV32XTHEADBB-NEXT: .LBB3_2:
; RV32XTHEADBB-NEXT: srl a3, a0, a2
; RV32XTHEADBB-NEXT: neg a6, a5
; RV32XTHEADBB-NEXT: sll a6, a1, a6
; RV32XTHEADBB-NEXT: or a6, a3, a6
-; RV32XTHEADBB-NEXT: .LBB3_3:
; RV32XTHEADBB-NEXT: mv a3, a0
-; RV32XTHEADBB-NEXT: beqz a5, .LBB3_5
-; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: beqz a5, .LBB3_4
+; RV32XTHEADBB-NEXT: .LBB3_3:
; RV32XTHEADBB-NEXT: mv a3, a6
-; RV32XTHEADBB-NEXT: .LBB3_5:
+; RV32XTHEADBB-NEXT: .LBB3_4:
; RV32XTHEADBB-NEXT: neg a6, a2
-; RV32XTHEADBB-NEXT: bltu a5, a4, .LBB3_9
-; RV32XTHEADBB-NEXT: # %bb.6:
+; RV32XTHEADBB-NEXT: bltu a5, a4, .LBB3_7
+; RV32XTHEADBB-NEXT: # %bb.5:
; RV32XTHEADBB-NEXT: li a2, 0
-; RV32XTHEADBB-NEXT: .LBB3_7:
; RV32XTHEADBB-NEXT: andi a5, a6, 63
-; RV32XTHEADBB-NEXT: bgeu a5, a4, .LBB3_10
-; RV32XTHEADBB-NEXT: # %bb.8:
+; RV32XTHEADBB-NEXT: bgeu a5, a4, .LBB3_8
+; RV32XTHEADBB-NEXT: .LBB3_6:
; RV32XTHEADBB-NEXT: sll a4, a0, a6
; RV32XTHEADBB-NEXT: neg a7, a5
; RV32XTHEADBB-NEXT: srl a0, a0, a7
; RV32XTHEADBB-NEXT: sll a6, a1, a6
; RV32XTHEADBB-NEXT: or a0, a0, a6
-; RV32XTHEADBB-NEXT: bnez a5, .LBB3_11
-; RV32XTHEADBB-NEXT: j .LBB3_12
-; RV32XTHEADBB-NEXT: .LBB3_9:
+; RV32XTHEADBB-NEXT: bnez a5, .LBB3_9
+; RV32XTHEADBB-NEXT: j .LBB3_10
+; RV32XTHEADBB-NEXT: .LBB3_7:
; RV32XTHEADBB-NEXT: srl a2, a1, a2
-; RV32XTHEADBB-NEXT: j .LBB3_7
-; RV32XTHEADBB-NEXT: .LBB3_10:
+; RV32XTHEADBB-NEXT: andi a5, a6, 63
+; RV32XTHEADBB-NEXT: bltu a5, a4, .LBB3_6
+; RV32XTHEADBB-NEXT: .LBB3_8:
; RV32XTHEADBB-NEXT: li a4, 0
; RV32XTHEADBB-NEXT: sll a0, a0, a5
-; RV32XTHEADBB-NEXT: beqz a5, .LBB3_12
-; RV32XTHEADBB-NEXT: .LBB3_11:
+; RV32XTHEADBB-NEXT: beqz a5, .LBB3_10
+; RV32XTHEADBB-NEXT: .LBB3_9:
; RV32XTHEADBB-NEXT: mv a1, a0
-; RV32XTHEADBB-NEXT: .LBB3_12:
+; RV32XTHEADBB-NEXT: .LBB3_10:
; RV32XTHEADBB-NEXT: or a0, a3, a4
; RV32XTHEADBB-NEXT: or a1, a2, a1
; RV32XTHEADBB-NEXT: ret
@@ -958,42 +961,43 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: sll a7, a0, a6
-; RV32I-NEXT: j .LBB11_3
+; RV32I-NEXT: mv a5, a1
+; RV32I-NEXT: bnez a6, .LBB11_3
+; RV32I-NEXT: j .LBB11_4
; RV32I-NEXT: .LBB11_2:
; RV32I-NEXT: sll a3, a0, a2
; RV32I-NEXT: neg a5, a6
; RV32I-NEXT: srl a5, a0, a5
; RV32I-NEXT: sll a7, a1, a2
; RV32I-NEXT: or a7, a5, a7
-; RV32I-NEXT: .LBB11_3:
; RV32I-NEXT: mv a5, a1
-; RV32I-NEXT: beqz a6, .LBB11_5
-; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: beqz a6, .LBB11_4
+; RV32I-NEXT: .LBB11_3:
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB11_5:
+; RV32I-NEXT: .LBB11_4:
; RV32I-NEXT: neg a2, a2
; RV32I-NEXT: andi a6, a2, 63
-; RV32I-NEXT: bltu a6, a4, .LBB11_7
-; RV32I-NEXT: # %bb.6:
+; RV32I-NEXT: bltu a6, a4, .LBB11_6
+; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: srl a7, a1, a6
-; RV32I-NEXT: bnez a6, .LBB11_8
-; RV32I-NEXT: j .LBB11_9
-; RV32I-NEXT: .LBB11_7:
+; RV32I-NEXT: bnez a6, .LBB11_7
+; RV32I-NEXT: j .LBB11_8
+; RV32I-NEXT: .LBB11_6:
; RV32I-NEXT: srl a7, a0, a2
; RV32I-NEXT: neg t0, a6
; RV32I-NEXT: sll t0, a1, t0
; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: beqz a6, .LBB11_9
-; RV32I-NEXT: .LBB11_8:
+; RV32I-NEXT: beqz a6, .LBB11_8
+; RV32I-NEXT: .LBB11_7:
; RV32I-NEXT: mv a0, a7
-; RV32I-NEXT: .LBB11_9:
-; RV32I-NEXT: bltu a6, a4, .LBB11_11
-; RV32I-NEXT: # %bb.10:
+; RV32I-NEXT: .LBB11_8:
+; RV32I-NEXT: bltu a6, a4, .LBB11_10
+; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: j .LBB11_12
-; RV32I-NEXT: .LBB11_11:
+; RV32I-NEXT: j .LBB11_11
+; RV32I-NEXT: .LBB11_10:
; RV32I-NEXT: srl a1, a1, a2
-; RV32I-NEXT: .LBB11_12:
+; RV32I-NEXT: .LBB11_11:
; RV32I-NEXT: or a0, a3, a0
; RV32I-NEXT: or a1, a5, a1
; RV32I-NEXT: ret
@@ -1014,42 +1018,43 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: li a3, 0
; RV32ZBB-NEXT: sll a7, a0, a6
-; RV32ZBB-NEXT: j .LBB11_3
+; RV32ZBB-NEXT: mv a5, a1
+; RV32ZBB-NEXT: bnez a6, .LBB11_3
+; RV32ZBB-NEXT: j .LBB11_4
; RV32ZBB-NEXT: .LBB11_2:
; RV32ZBB-NEXT: sll a3, a0, a2
; RV32ZBB-NEXT: neg a5, a6
; RV32ZBB-NEXT: srl a5, a0, a5
; RV32ZBB-NEXT: sll a7, a1, a2
; RV32ZBB-NEXT: or a7, a5, a7
-; RV32ZBB-NEXT: .LBB11_3:
; RV32ZBB-NEXT: mv a5, a1
-; RV32ZBB-NEXT: beqz a6, .LBB11_5
-; RV32ZBB-NEXT: # %bb.4:
+; RV32ZBB-NEXT: beqz a6, .LBB11_4
+; RV32ZBB-NEXT: .LBB11_3:
; RV32ZBB-NEXT: mv a5, a7
-; RV32ZBB-NEXT: .LBB11_5:
+; RV32ZBB-NEXT: .LBB11_4:
; RV32ZBB-NEXT: neg a2, a2
; RV32ZBB-NEXT: andi a6, a2, 63
-; RV32ZBB-NEXT: bltu a6, a4, .LBB11_7
-; RV32ZBB-NEXT: # %bb.6:
+; RV32ZBB-NEXT: bltu a6, a4, .LBB11_6
+; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: srl a7, a1, a6
-; RV32ZBB-NEXT: bnez a6, .LBB11_8
-; RV32ZBB-NEXT: j .LBB11_9
-; RV32ZBB-NEXT: .LBB11_7:
+; RV32ZBB-NEXT: bnez a6, .LBB11_7
+; RV32ZBB-NEXT: j .LBB11_8
+; RV32ZBB-NEXT: .LBB11_6:
; RV32ZBB-NEXT: srl a7, a0, a2
; RV32ZBB-NEXT: neg t0, a6
; RV32ZBB-NEXT: sll t0, a1, t0
; RV32ZBB-NEXT: or a7, a7, t0
-; RV32ZBB-NEXT: beqz a6, .LBB11_9
-; RV32ZBB-NEXT: .LBB11_8:
+; RV32ZBB-NEXT: beqz a6, .LBB11_8
+; RV32ZBB-NEXT: .LBB11_7:
; RV32ZBB-NEXT: mv a0, a7
-; RV32ZBB-NEXT: .LBB11_9:
-; RV32ZBB-NEXT: bltu a6, a4, .LBB11_11
-; RV32ZBB-NEXT: # %bb.10:
+; RV32ZBB-NEXT: .LBB11_8:
+; RV32ZBB-NEXT: bltu a6, a4, .LBB11_10
+; RV32ZBB-NEXT: # %bb.9:
; RV32ZBB-NEXT: li a1, 0
-; RV32ZBB-NEXT: j .LBB11_12
-; RV32ZBB-NEXT: .LBB11_11:
+; RV32ZBB-NEXT: j .LBB11_11
+; RV32ZBB-NEXT: .LBB11_10:
; RV32ZBB-NEXT: srl a1, a1, a2
-; RV32ZBB-NEXT: .LBB11_12:
+; RV32ZBB-NEXT: .LBB11_11:
; RV32ZBB-NEXT: or a0, a3, a0
; RV32ZBB-NEXT: or a1, a5, a1
; RV32ZBB-NEXT: ret
@@ -1070,42 +1075,43 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32XTHEADBB-NEXT: # %bb.1:
; RV32XTHEADBB-NEXT: li a3, 0
; RV32XTHEADBB-NEXT: sll a7, a0, a6
-; RV32XTHEADBB-NEXT: j .LBB11_3
+; RV32XTHEADBB-NEXT: mv a5, a1
+; RV32XTHEADBB-NEXT: bnez a6, .LBB11_3
+; RV32XTHEADBB-NEXT: j .LBB11_4
; RV32XTHEADBB-NEXT: .LBB11_2:
; RV32XTHEADBB-NEXT: sll a3, a0, a2
; RV32XTHEADBB-NEXT: neg a5, a6
; RV32XTHEADBB-NEXT: srl a5, a0, a5
; RV32XTHEADBB-NEXT: sll a7, a1, a2
; RV32XTHEADBB-NEXT: or a7, a5, a7
-; RV32XTHEADBB-NEXT: .LBB11_3:
; RV32XTHEADBB-NEXT: mv a5, a1
-; RV32XTHEADBB-NEXT: beqz a6, .LBB11_5
-; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: beqz a6, .LBB11_4
+; RV32XTHEADBB-NEXT: .LBB11_3:
; RV32XTHEADBB-NEXT: mv a5, a7
-; RV32XTHEADBB-NEXT: .LBB11_5:
+; RV32XTHEADBB-NEXT: .LBB11_4:
; RV32XTHEADBB-NEXT: neg a2, a2
; RV32XTHEADBB-NEXT: andi a6, a2, 63
-; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_7
-; RV32XTHEADBB-NEXT: # %bb.6:
+; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_6
+; RV32XTHEADBB-NEXT: # %bb.5:
; RV32XTHEADBB-NEXT: srl a7, a1, a6
-; RV32XTHEADBB-NEXT: bnez a6, .LBB11_8
-; RV32XTHEADBB-NEXT: j .LBB11_9
-; RV32XTHEADBB-NEXT: .LBB11_7:
+; RV32XTHEADBB-NEXT: bnez a6, .LBB11_7
+; RV32XTHEADBB-NEXT: j .LBB11_8
+; RV32XTHEADBB-NEXT: .LBB11_6:
; RV32XTHEADBB-NEXT: srl a7, a0, a2
; RV32XTHEADBB-NEXT: neg t0, a6
; RV32XTHEADBB-NEXT: sll t0, a1, t0
; RV32XTHEADBB-NEXT: or a7, a7, t0
-; RV32XTHEADBB-NEXT: beqz a6, .LBB11_9
-; RV32XTHEADBB-NEXT: .LBB11_8:
+; RV32XTHEADBB-NEXT: beqz a6, .LBB11_8
+; RV32XTHEADBB-NEXT: .LBB11_7:
; RV32XTHEADBB-NEXT: mv a0, a7
-; RV32XTHEADBB-NEXT: .LBB11_9:
-; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_11
-; RV32XTHEADBB-NEXT: # %bb.10:
+; RV32XTHEADBB-NEXT: .LBB11_8:
+; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_10
+; RV32XTHEADBB-NEXT: # %bb.9:
; RV32XTHEADBB-NEXT: li a1, 0
-; RV32XTHEADBB-NEXT: j .LBB11_12
-; RV32XTHEADBB-NEXT: .LBB11_11:
+; RV32XTHEADBB-NEXT: j .LBB11_11
+; RV32XTHEADBB-NEXT: .LBB11_10:
; RV32XTHEADBB-NEXT: srl a1, a1, a2
-; RV32XTHEADBB-NEXT: .LBB11_12:
+; RV32XTHEADBB-NEXT: .LBB11_11:
; RV32XTHEADBB-NEXT: or a0, a3, a0
; RV32XTHEADBB-NEXT: or a1, a5, a1
; RV32XTHEADBB-NEXT: ret
@@ -1400,43 +1406,44 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: bltu a4, a5, .LBB14_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a6, a1, a4
-; RV32I-NEXT: j .LBB14_3
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: bnez a4, .LBB14_3
+; RV32I-NEXT: j .LBB14_4
; RV32I-NEXT: .LBB14_2:
; RV32I-NEXT: srl a3, a0, a2
; RV32I-NEXT: neg a6, a4
; RV32I-NEXT: sll a6, a1, a6
; RV32I-NEXT: or a6, a3, a6
-; RV32I-NEXT: .LBB14_3:
; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: beqz a4, .LBB14_5
-; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: beqz a4, .LBB14_4
+; RV32I-NEXT: .LBB14_3:
; RV32I-NEXT: mv a3, a6
-; RV32I-NEXT: .LBB14_5:
-; RV32I-NEXT: bltu a4, a5, .LBB14_7
-; RV32I-NEXT: # %bb.6:
+; RV32I-NEXT: .LBB14_4:
+; RV32I-NEXT: bltu a4, a5, .LBB14_6
+; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: li a4, 0
-; RV32I-NEXT: j .LBB14_8
-; RV32I-NEXT: .LBB14_7:
+; RV32I-NEXT: j .LBB14_7
+; RV32I-NEXT: .LBB14_6:
; RV32I-NEXT: srl a4, a1, a2
-; RV32I-NEXT: .LBB14_8:
+; RV32I-NEXT: .LBB14_7:
; RV32I-NEXT: neg a7, a2
; RV32I-NEXT: andi a6, a7, 63
-; RV32I-NEXT: bltu a6, a5, .LBB14_10
-; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: bltu a6, a5, .LBB14_9
+; RV32I-NEXT: # %bb.8:
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: sll a0, a0, a6
-; RV32I-NEXT: bnez a6, .LBB14_11
-; RV32I-NEXT: j .LBB14_12
-; RV32I-NEXT: .LBB14_10:
+; RV32I-NEXT: bnez a6, .LBB14_10
+; RV32I-NEXT: j .LBB14_11
+; RV32I-NEXT: .LBB14_9:
; RV32I-NEXT: sll a2, a0, a7
; RV32I-NEXT: neg a5, a6
; RV32I-NEXT: srl a0, a0, a5
; RV32I-NEXT: sll a5, a1, a7
; RV32I-NEXT: or a0, a0, a5
-; RV32I-NEXT: beqz a6, .LBB14_12
-; RV32I-NEXT: .LBB14_11:
+; RV32I-NEXT: beqz a6, .LBB14_11
+; RV32I-NEXT: .LBB14_10:
; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: .LBB14_12:
+; RV32I-NEXT: .LBB14_11:
; RV32I-NEXT: or a0, a3, a2
; RV32I-NEXT: or a1, a4, a1
; RV32I-NEXT: ret
@@ -1456,43 +1463,44 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32ZBB-NEXT: bltu a4, a5, .LBB14_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: srl a6, a1, a4
-; RV32ZBB-NEXT: j .LBB14_3
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: bnez a4, .LBB14_3
+; RV32ZBB-NEXT: j .LBB14_4
; RV32ZBB-NEXT: .LBB14_2:
; RV32ZBB-NEXT: srl a3, a0, a2
; RV32ZBB-NEXT: neg a6, a4
; RV32ZBB-NEXT: sll a6, a1, a6
; RV32ZBB-NEXT: or a6, a3, a6
-; RV32ZBB-NEXT: .LBB14_3:
; RV32ZBB-NEXT: mv a3, a0
-; RV32ZBB-NEXT: beqz a4, .LBB14_5
-; RV32ZBB-NEXT: # %bb.4:
+; RV32ZBB-NEXT: beqz a4, .LBB14_4
+; RV32ZBB-NEXT: .LBB14_3:
; RV32ZBB-NEXT: mv a3, a6
-; RV32ZBB-NEXT: .LBB14_5:
-; RV32ZBB-NEXT: bltu a4, a5, .LBB14_7
-; RV32ZBB-NEXT: # %bb.6:
+; RV32ZBB-NEXT: .LBB14_4:
+; RV32ZBB-NEXT: bltu a4, a5, .LBB14_6
+; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: li a4, 0
-; RV32ZBB-NEXT: j .LBB14_8
-; RV32ZBB-NEXT: .LBB14_7:
+; RV32ZBB-NEXT: j .LBB14_7
+; RV32ZBB-NEXT: .LBB14_6:
; RV32ZBB-NEXT: srl a4, a1, a2
-; RV32ZBB-NEXT: .LBB14_8:
+; RV32ZBB-NEXT: .LBB14_7:
; RV32ZBB-NEXT: neg a7, a2
; RV32ZBB-NEXT: andi a6, a7, 63
-; RV32ZBB-NEXT: bltu a6, a5, .LBB14_10
-; RV32ZBB-NEXT: # %bb.9:
+; RV32ZBB-NEXT: bltu a6, a5, .LBB14_9
+; RV32ZBB-NEXT: # %bb.8:
; RV32ZBB-NEXT: li a2, 0
; RV32ZBB-NEXT: sll a0, a0, a6
-; RV32ZBB-NEXT: bnez a6, .LBB14_11
-; RV32ZBB-NEXT: j .LBB14_12
-; RV32ZBB-NEXT: .LBB14_10:
+; RV32ZBB-NEXT: bnez a6, .LBB14_10
+; RV32ZBB-NEXT: j .LBB14_11
+; RV32ZBB-NEXT: .LBB14_9:
; RV32ZBB-NEXT: sll a2, a0, a7
; RV32ZBB-NEXT: neg a5, a6
; RV32ZBB-NEXT: srl a0, a0, a5
; RV32ZBB-NEXT: sll a5, a1, a7
; RV32ZBB-NEXT: or a0, a0, a5
-; RV32ZBB-NEXT: beqz a6, .LBB14_12
-; RV32ZBB-NEXT: .LBB14_11:
+; RV32ZBB-NEXT: beqz a6, .LBB14_11
+; RV32ZBB-NEXT: .LBB14_10:
; RV32ZBB-NEXT: mv a1, a0
-; RV32ZBB-NEXT: .LBB14_12:
+; RV32ZBB-NEXT: .LBB14_11:
; RV32ZBB-NEXT: or a0, a3, a2
; RV32ZBB-NEXT: or a1, a4, a1
; RV32ZBB-NEXT: ret
@@ -1512,43 +1520,44 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV32XTHEADBB-NEXT: bltu a4, a5, .LBB14_2
; RV32XTHEADBB-NEXT: # %bb.1:
; RV32XTHEADBB-NEXT: srl a6, a1, a4
-; RV32XTHEADBB-NEXT: j .LBB14_3
+; RV32XTHEADBB-NEXT: mv a3, a0
+; RV32XTHEADBB-NEXT: bnez a4, .LBB14_3
+; RV32XTHEADBB-NEXT: j .LBB14_4
; RV32XTHEADBB-NEXT: .LBB14_2:
; RV32XTHEADBB-NEXT: srl a3, a0, a2
; RV32XTHEADBB-NEXT: neg a6, a4
; RV32XTHEADBB-NEXT: sll a6, a1, a6
; RV32XTHEADBB-NEXT: or a6, a3, a6
-; RV32XTHEADBB-NEXT: .LBB14_3:
; RV32XTHEADBB-NEXT: mv a3, a0
-; RV32XTHEADBB-NEXT: beqz a4, .LBB14_5
-; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: beqz a4, .LBB14_4
+; RV32XTHEADBB-NEXT: .LBB14_3:
; RV32XTHEADBB-NEXT: mv a3, a6
-; RV32XTHEADBB-NEXT: .LBB14_5:
-; RV32XTHEADBB-NEXT: bltu a4, a5, .LBB14_7
-; RV32XTHEADBB-NEXT: # %bb.6:
+; RV32XTHEADBB-NEXT: .LBB14_4:
+; RV32XTHEADBB-NEXT: bltu a4, a5, .LBB14_6
+; RV32XTHEADBB-NEXT: # %bb.5:
; RV32XTHEADBB-NEXT: li a4, 0
-; RV32XTHEADBB-NEXT: j .LBB14_8
-; RV32XTHEADBB-NEXT: .LBB14_7:
+; RV32XTHEADBB-NEXT: j .LBB14_7
+; RV32XTHEADBB-NEXT: .LBB14_6:
; RV32XTHEADBB-NEXT: srl a4, a1, a2
-; RV32XTHEADBB-NEXT: .LBB14_8:
+; RV32XTHEADBB-NEXT: .LBB14_7:
; RV32XTHEADBB-NEXT: neg a7, a2
; RV32XTHEADBB-NEXT: andi a6, a7, 63
-; RV32XTHEADBB-NEXT: bltu a6, a5, .LBB14_10
-; RV32XTHEADBB-NEXT: # %bb.9:
+; RV32XTHEADBB-NEXT: bltu a6, a5, .LBB14_9
+; RV32XTHEADBB-NEXT: # %bb.8:
; RV32XTHEADBB-NEXT: li a2, 0
; RV32XTHEADBB-NEXT: sll a0, a0, a6
-; RV32XTHEADBB-NEXT: bnez a6, .LBB14_11
-; RV32XTHEADBB-NEXT: j .LBB14_12
-; RV32XTHEADBB-NEXT: .LBB14_10:
+; RV32XTHEADBB-NEXT: bnez a6, .LBB14_10
+; RV32XTHEADBB-NEXT: j .LBB14_11
+; RV32XTHEADBB-NEXT: .LBB14_9:
; RV32XTHEADBB-NEXT: sll a2, a0, a7
; RV32XTHEADBB-NEXT: neg a5, a6
; RV32XTHEADBB-NEXT: srl a0, a0, a5
; RV32XTHEADBB-NEXT: sll a5, a1, a7
; RV32XTHEADBB-NEXT: or a0, a0, a5
-; RV32XTHEADBB-NEXT: beqz a6, .LBB14_12
-; RV32XTHEADBB-NEXT: .LBB14_11:
+; RV32XTHEADBB-NEXT: beqz a6, .LBB14_11
+; RV32XTHEADBB-NEXT: .LBB14_10:
; RV32XTHEADBB-NEXT: mv a1, a0
-; RV32XTHEADBB-NEXT: .LBB14_12:
+; RV32XTHEADBB-NEXT: .LBB14_11:
; RV32XTHEADBB-NEXT: or a0, a3, a2
; RV32XTHEADBB-NEXT: or a1, a4, a1
; RV32XTHEADBB-NEXT: ret
@@ -2052,59 +2061,60 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV32I-NEXT: bltu a5, t0, .LBB19_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl t1, a1, a5
-; RV32I-NEXT: j .LBB19_3
+; RV32I-NEXT: mv a7, a0
+; RV32I-NEXT: bnez a5, .LBB19_3
+; RV32I-NEXT: j .LBB19_4
; RV32I-NEXT: .LBB19_2:
; RV32I-NEXT: srl a7, a0, a4
; RV32I-NEXT: sll t1, a1, a6
; RV32I-NEXT: or t1, a7, t1
-; RV32I-NEXT: .LBB19_3:
; RV32I-NEXT: mv a7, a0
-; RV32I-NEXT: beqz a5, .LBB19_5
-; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: beqz a5, .LBB19_4
+; RV32I-NEXT: .LBB19_3:
; RV32I-NEXT: mv a7, t1
-; RV32I-NEXT: .LBB19_5:
+; RV32I-NEXT: .LBB19_4:
; RV32I-NEXT: neg t4, a5
-; RV32I-NEXT: bltu a5, t0, .LBB19_9
-; RV32I-NEXT: # %bb.6:
+; RV32I-NEXT: bltu a5, t0, .LBB19_7
+; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: li t1, 0
-; RV32I-NEXT: .LBB19_7:
; RV32I-NEXT: andi t3, t4, 63
-; RV32I-NEXT: bgeu t3, t0, .LBB19_10
-; RV32I-NEXT: # %bb.8:
+; RV32I-NEXT: bgeu t3, t0, .LBB19_8
+; RV32I-NEXT: .LBB19_6:
; RV32I-NEXT: sll t2, a0, t4
; RV32I-NEXT: neg t5, t3
; RV32I-NEXT: srl a0, a0, t5
; RV32I-NEXT: sll t4, a1, t4
; RV32I-NEXT: or a0, a0, t4
-; RV32I-NEXT: bnez t3, .LBB19_11
-; RV32I-NEXT: j .LBB19_12
-; RV32I-NEXT: .LBB19_9:
+; RV32I-NEXT: bnez t3, .LBB19_9
+; RV32I-NEXT: j .LBB19_10
+; RV32I-NEXT: .LBB19_7:
; RV32I-NEXT: srl t1, a1, a4
-; RV32I-NEXT: j .LBB19_7
-; RV32I-NEXT: .LBB19_10:
+; RV32I-NEXT: andi t3, t4, 63
+; RV32I-NEXT: bltu t3, t0, .LBB19_6
+; RV32I-NEXT: .LBB19_8:
; RV32I-NEXT: li t2, 0
; RV32I-NEXT: sll a0, a0, t3
-; RV32I-NEXT: beqz t3, .LBB19_12
-; RV32I-NEXT: .LBB19_11:
+; RV32I-NEXT: beqz t3, .LBB19_10
+; RV32I-NEXT: .LBB19_9:
; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: .LBB19_12:
-; RV32I-NEXT: bltu a5, t0, .LBB19_14
-; RV32I-NEXT: # %bb.13:
+; RV32I-NEXT: .LBB19_10:
+; RV32I-NEXT: bltu a5, t0, .LBB19_12
+; RV32I-NEXT: # %bb.11:
; RV32I-NEXT: li t0, 0
; RV32I-NEXT: sll a0, a2, a5
-; RV32I-NEXT: j .LBB19_15
-; RV32I-NEXT: .LBB19_14:
+; RV32I-NEXT: j .LBB19_13
+; RV32I-NEXT: .LBB19_12:
; RV32I-NEXT: sll t0, a2, a4
; RV32I-NEXT: srl a0, a2, a6
; RV32I-NEXT: sll a2, a3, a4
; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: .LBB19_15:
+; RV32I-NEXT: .LBB19_13:
; RV32I-NEXT: or a2, a7, t2
; RV32I-NEXT: or a1, t1, a1
-; RV32I-NEXT: beqz a5, .LBB19_17
-; RV32I-NEXT: # %bb.16:
+; RV32I-NEXT: beqz a5, .LBB19_15
+; RV32I-NEXT: # %bb.14:
; RV32I-NEXT: mv a3, a0
-; RV32I-NEXT: .LBB19_17:
+; RV32I-NEXT: .LBB19_15:
; RV32I-NEXT: add a0, a2, t0
; RV32I-NEXT: sltu a2, a0, t0
; RV32I-NEXT: add a1, a1, a3
@@ -2130,59 +2140,60 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV32ZBB-NEXT: bltu a5, t0, .LBB19_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: srl t1, a1, a5
-; RV32ZBB-NEXT: j .LBB19_3
+; RV32ZBB-NEXT: mv a7, a0
+; RV32ZBB-NEXT: bnez a5, .LBB19_3
+; RV32ZBB-NEXT: j .LBB19_4
; RV32ZBB-NEXT: .LBB19_2:
; RV32ZBB-NEXT: srl a7, a0, a4
; RV32ZBB-NEXT: sll t1, a1, a6
; RV32ZBB-NEXT: or t1, a7, t1
-; RV32ZBB-NEXT: .LBB19_3:
; RV32ZBB-NEXT: mv a7, a0
-; RV32ZBB-NEXT: beqz a5, .LBB19_5
-; RV32ZBB-NEXT: # %bb.4:
+; RV32ZBB-NEXT: beqz a5, .LBB19_4
+; RV32ZBB-NEXT: .LBB19_3:
; RV32ZBB-NEXT: mv a7, t1
-; RV32ZBB-NEXT: .LBB19_5:
+; RV32ZBB-NEXT: .LBB19_4:
; RV32ZBB-NEXT: neg t4, a5
-; RV32ZBB-NEXT: bltu a5, t0, .LBB19_9
-; RV32ZBB-NEXT: # %bb.6:
+; RV32ZBB-NEXT: bltu a5, t0, .LBB19_7
+; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: li t1, 0
-; RV32ZBB-NEXT: .LBB19_7:
; RV32ZBB-NEXT: andi t3, t4, 63
-; RV32ZBB-NEXT: bgeu t3, t0, .LBB19_10
-; RV32ZBB-NEXT: # %bb.8:
+; RV32ZBB-NEXT: bgeu t3, t0, .LBB19_8
+; RV32ZBB-NEXT: .LBB19_6:
; RV32ZBB-NEXT: sll t2, a0, t4
; RV32ZBB-NEXT: neg t5, t3
; RV32ZBB-NEXT: srl a0, a0, t5
; RV32ZBB-NEXT: sll t4, a1, t4
; RV32ZBB-NEXT: or a0, a0, t4
-; RV32ZBB-NEXT: bnez t3, .LBB19_11
-; RV32ZBB-NEXT: j .LBB19_12
-; RV32ZBB-NEXT: .LBB19_9:
+; RV32ZBB-NEXT: bnez t3, .LBB19_9
+; RV32ZBB-NEXT: j .LBB19_10
+; RV32ZBB-NEXT: .LBB19_7:
; RV32ZBB-NEXT: srl t1, a1, a4
-; RV32ZBB-NEXT: j .LBB19_7
-; RV32ZBB-NEXT: .LBB19_10:
+; RV32ZBB-NEXT: andi t3, t4, 63
+; RV32ZBB-NEXT: bltu t3, t0, .LBB19_6
+; RV32ZBB-NEXT: .LBB19_8:
; RV32ZBB-NEXT: li t2, 0
; RV32ZBB-NEXT: sll a0, a0, t3
-; RV32ZBB-NEXT: beqz t3, .LBB19_12
-; RV32ZBB-NEXT: .LBB19_11:
+; RV32ZBB-NEXT: beqz t3, .LBB19_10
+; RV32ZBB-NEXT: .LBB19_9:
; RV32ZBB-NEXT: mv a1, a0
-; RV32ZBB-NEXT: .LBB19_12:
-; RV32ZBB-NEXT: bltu a5, t0, .LBB19_14
-; RV32ZBB-NEXT: # %bb.13:
+; RV32ZBB-NEXT: .LBB19_10:
+; RV32ZBB-NEXT: bltu a5, t0, .LBB19_12
+; RV32ZBB-NEXT: # %bb.11:
; RV32ZBB-NEXT: li t0, 0
; RV32ZBB-NEXT: sll a0, a2, a5
-; RV32ZBB-NEXT: j .LBB19_15
-; RV32ZBB-NEXT: .LBB19_14:
+; RV32ZBB-NEXT: j .LBB19_13
+; RV32ZBB-NEXT: .LBB19_12:
; RV32ZBB-NEXT: sll t0, a2, a4
; RV32ZBB-NEXT: srl a0, a2, a6
; RV32ZBB-NEXT: sll a2, a3, a4
; RV32ZBB-NEXT: or a0, a0, a2
-; RV32ZBB-NEXT: .LBB19_15:
+; RV32ZBB-NEXT: .LBB19_13:
; RV32ZBB-NEXT: or a2, a7, t2
; RV32ZBB-NEXT: or a1, t1, a1
-; RV32ZBB-NEXT: beqz a5, .LBB19_17
-; RV32ZBB-NEXT: # %bb.16:
+; RV32ZBB-NEXT: beqz a5, .LBB19_15
+; RV32ZBB-NEXT: # %bb.14:
; RV32ZBB-NEXT: mv a3, a0
-; RV32ZBB-NEXT: .LBB19_17:
+; RV32ZBB-NEXT: .LBB19_15:
; RV32ZBB-NEXT: add a0, a2, t0
; RV32ZBB-NEXT: sltu a2, a0, t0
; RV32ZBB-NEXT: add a1, a1, a3
@@ -2204,59 +2215,60 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_2
; RV32XTHEADBB-NEXT: # %bb.1:
; RV32XTHEADBB-NEXT: srl t1, a1, a5
-; RV32XTHEADBB-NEXT: j .LBB19_3
+; RV32XTHEADBB-NEXT: mv a7, a0
+; RV32XTHEADBB-NEXT: bnez a5, .LBB19_3
+; RV32XTHEADBB-NEXT: j .LBB19_4
; RV32XTHEADBB-NEXT: .LBB19_2:
; RV32XTHEADBB-NEXT: srl a7, a0, a4
; RV32XTHEADBB-NEXT: sll t1, a1, a6
; RV32XTHEADBB-NEXT: or t1, a7, t1
-; RV32XTHEADBB-NEXT: .LBB19_3:
; RV32XTHEADBB-NEXT: mv a7, a0
-; RV32XTHEADBB-NEXT: beqz a5, .LBB19_5
-; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: beqz a5, .LBB19_4
+; RV32XTHEADBB-NEXT: .LBB19_3:
; RV32XTHEADBB-NEXT: mv a7, t1
-; RV32XTHEADBB-NEXT: .LBB19_5:
+; RV32XTHEADBB-NEXT: .LBB19_4:
; RV32XTHEADBB-NEXT: neg t4, a5
-; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_9
-; RV32XTHEADBB-NEXT: # %bb.6:
+; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_7
+; RV32XTHEADBB-NEXT: # %bb.5:
; RV32XTHEADBB-NEXT: li t1, 0
-; RV32XTHEADBB-NEXT: .LBB19_7:
; RV32XTHEADBB-NEXT: andi t3, t4, 63
-; RV32XTHEADBB-NEXT: bgeu t3, t0, .LBB19_10
-; RV32XTHEADBB-NEXT: # %bb.8:
+; RV32XTHEADBB-NEXT: bgeu t3, t0, .LBB19_8
+; RV32XTHEADBB-NEXT: .LBB19_6:
; RV32XTHEADBB-NEXT: sll t2, a0, t4
; RV32XTHEADBB-NEXT: neg t5, t3
; RV32XTHEADBB-NEXT: srl a0, a0, t5
; RV32XTHEADBB-NEXT: sll t4, a1, t4
; RV32XTHEADBB-NEXT: or a0, a0, t4
-; RV32XTHEADBB-NEXT: bnez t3, .LBB19_11
-; RV32XTHEADBB-NEXT: j .LBB19_12
-; RV32XTHEADBB-NEXT: .LBB19_9:
+; RV32XTHEADBB-NEXT: bnez t3, .LBB19_9
+; RV32XTHEADBB-NEXT: j .LBB19_10
+; RV32XTHEADBB-NEXT: .LBB19_7:
; RV32XTHEADBB-NEXT: srl t1, a1, a4
-; RV32XTHEADBB-NEXT: j .LBB19_7
-; RV32XTHEADBB-NEXT: .LBB19_10:
+; RV32XTHEADBB-NEXT: andi t3, t4, 63
+; RV32XTHEADBB-NEXT: bltu t3, t0, .LBB19_6
+; RV32XTHEADBB-NEXT: .LBB19_8:
; RV32XTHEADBB-NEXT: li t2, 0
; RV32XTHEADBB-NEXT: sll a0, a0, t3
-; RV32XTHEADBB-NEXT: beqz t3, .LBB19_12
-; RV32XTHEADBB-NEXT: .LBB19_11:
+; RV32XTHEADBB-NEXT: beqz t3, .LBB19_10
+; RV32XTHEADBB-NEXT: .LBB19_9:
; RV32XTHEADBB-NEXT: mv a1, a0
-; RV32XTHEADBB-NEXT: .LBB19_12:
-; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_14
-; RV32XTHEADBB-NEXT: # %bb.13:
+; RV32XTHEADBB-NEXT: .LBB19_10:
+; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_12
+; RV32XTHEADBB-NEXT: # %bb.11:
; RV32XTHEADBB-NEXT: li t0, 0
; RV32XTHEADBB-NEXT: sll a0, a2, a5
-; RV32XTHEADBB-NEXT: j .LBB19_15
-; RV32XTHEADBB-NEXT: .LBB19_14:
+; RV32XTHEADBB-NEXT: j .LBB19_13
+; RV32XTHEADBB-NEXT: .LBB19_12:
; RV32XTHEADBB-NEXT: sll t0, a2, a4
; RV32XTHEADBB-NEXT: srl a0, a2, a6
; RV32XTHEADBB-NEXT: sll a2, a3, a4
; RV32XTHEADBB-NEXT: or a0, a0, a2
-; RV32XTHEADBB-NEXT: .LBB19_15:
+; RV32XTHEADBB-NEXT: .LBB19_13:
; RV32XTHEADBB-NEXT: or a2, a7, t2
; RV32XTHEADBB-NEXT: or a1, t1, a1
-; RV32XTHEADBB-NEXT: beqz a5, .LBB19_17
-; RV32XTHEADBB-NEXT: # %bb.16:
+; RV32XTHEADBB-NEXT: beqz a5, .LBB19_15
+; RV32XTHEADBB-NEXT: # %bb.14:
; RV32XTHEADBB-NEXT: mv a3, a0
-; RV32XTHEADBB-NEXT: .LBB19_17:
+; RV32XTHEADBB-NEXT: .LBB19_15:
; RV32XTHEADBB-NEXT: add a0, a2, t0
; RV32XTHEADBB-NEXT: sltu a2, a0, t0
; RV32XTHEADBB-NEXT: add a1, a1, a3
@@ -2403,41 +2415,42 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32I-NEXT: srl t3, a2, t3
; RV32I-NEXT: sll a4, a3, a4
; RV32I-NEXT: or t3, t3, a4
-; RV32I-NEXT: j .LBB21_14
+; RV32I-NEXT: mv a4, a3
+; RV32I-NEXT: bnez t1, .LBB21_14
+; RV32I-NEXT: j .LBB21_15
; RV32I-NEXT: .LBB21_12:
; RV32I-NEXT: srl t5, a1, a7
; RV32I-NEXT: bltu t1, a5, .LBB21_11
; RV32I-NEXT: .LBB21_13:
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: sll t3, a2, t1
-; RV32I-NEXT: .LBB21_14:
; RV32I-NEXT: mv a4, a3
-; RV32I-NEXT: beqz t1, .LBB21_16
-; RV32I-NEXT: # %bb.15:
+; RV32I-NEXT: beqz t1, .LBB21_15
+; RV32I-NEXT: .LBB21_14:
; RV32I-NEXT: mv a4, t3
-; RV32I-NEXT: .LBB21_16:
-; RV32I-NEXT: bltu t2, a5, .LBB21_18
-; RV32I-NEXT: # %bb.17:
+; RV32I-NEXT: .LBB21_15:
+; RV32I-NEXT: bltu t2, a5, .LBB21_17
+; RV32I-NEXT: # %bb.16:
; RV32I-NEXT: srl t1, a3, t2
-; RV32I-NEXT: bnez t2, .LBB21_19
-; RV32I-NEXT: j .LBB21_20
-; RV32I-NEXT: .LBB21_18:
+; RV32I-NEXT: bnez t2, .LBB21_18
+; RV32I-NEXT: j .LBB21_19
+; RV32I-NEXT: .LBB21_17:
; RV32I-NEXT: srl t1, a2, a7
; RV32I-NEXT: sll t3, a3, t4
; RV32I-NEXT: or t1, t1, t3
-; RV32I-NEXT: beqz t2, .LBB21_20
-; RV32I-NEXT: .LBB21_19:
+; RV32I-NEXT: beqz t2, .LBB21_19
+; RV32I-NEXT: .LBB21_18:
; RV32I-NEXT: mv a2, t1
-; RV32I-NEXT: .LBB21_20:
+; RV32I-NEXT: .LBB21_19:
; RV32I-NEXT: or a0, a6, a0
; RV32I-NEXT: or a6, t0, t5
-; RV32I-NEXT: bltu t2, a5, .LBB21_22
-; RV32I-NEXT: # %bb.21:
+; RV32I-NEXT: bltu t2, a5, .LBB21_21
+; RV32I-NEXT: # %bb.20:
; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: j .LBB21_23
-; RV32I-NEXT: .LBB21_22:
+; RV32I-NEXT: j .LBB21_22
+; RV32I-NEXT: .LBB21_21:
; RV32I-NEXT: srl a3, a3, a7
-; RV32I-NEXT: .LBB21_23:
+; RV32I-NEXT: .LBB21_22:
; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: add a0, a0, a1
@@ -2505,41 +2518,42 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32ZBB-NEXT: srl t3, a2, t3
; RV32ZBB-NEXT: sll a4, a3, a4
; RV32ZBB-NEXT: or t3, t3, a4
-; RV32ZBB-NEXT: j .LBB21_14
+; RV32ZBB-NEXT: mv a4, a3
+; RV32ZBB-NEXT: bnez t1, .LBB21_14
+; RV32ZBB-NEXT: j .LBB21_15
; RV32ZBB-NEXT: .LBB21_12:
; RV32ZBB-NEXT: srl t5, a1, a7
; RV32ZBB-NEXT: bltu t1, a5, .LBB21_11
; RV32ZBB-NEXT: .LBB21_13:
; RV32ZBB-NEXT: li a1, 0
; RV32ZBB-NEXT: sll t3, a2, t1
-; RV32ZBB-NEXT: .LBB21_14:
; RV32ZBB-NEXT: mv a4, a3
-; RV32ZBB-NEXT: beqz t1, .LBB21_16
-; RV32ZBB-NEXT: # %bb.15:
+; RV32ZBB-NEXT: beqz t1, .LBB21_15
+; RV32ZBB-NEXT: .LBB21_14:
; RV32ZBB-NEXT: mv a4, t3
-; RV32ZBB-NEXT: .LBB21_16:
-; RV32ZBB-NEXT: bltu t2, a5, .LBB21_18
-; RV32ZBB-NEXT: # %bb.17:
+; RV32ZBB-NEXT: .LBB21_15:
+; RV32ZBB-NEXT: bltu t2, a5, .LBB21_17
+; RV32ZBB-NEXT: # %bb.16:
; RV32ZBB-NEXT: srl t1, a3, t2
-; RV32ZBB-NEXT: bnez t2, .LBB21_19
-; RV32ZBB-NEXT: j .LBB21_20
-; RV32ZBB-NEXT: .LBB21_18:
+; RV32ZBB-NEXT: bnez t2, .LBB21_18
+; RV32ZBB-NEXT: j .LBB21_19
+; RV32ZBB-NEXT: .LBB21_17:
; RV32ZBB-NEXT: srl t1, a2, a7
; RV32ZBB-NEXT: sll t3, a3, t4
; RV32ZBB-NEXT: or t1, t1, t3
-; RV32ZBB-NEXT: beqz t2, .LBB21_20
-; RV32ZBB-NEXT: .LBB21_19:
+; RV32ZBB-NEXT: beqz t2, .LBB21_19
+; RV32ZBB-NEXT: .LBB21_18:
; RV32ZBB-NEXT: mv a2, t1
-; RV32ZBB-NEXT: .LBB21_20:
+; RV32ZBB-NEXT: .LBB21_19:
; RV32ZBB-NEXT: or a0, a6, a0
; RV32ZBB-NEXT: or a6, t0, t5
-; RV32ZBB-NEXT: bltu t2, a5, .LBB21_22
-; RV32ZBB-NEXT: # %bb.21:
+; RV32ZBB-NEXT: bltu t2, a5, .LBB21_21
+; RV32ZBB-NEXT: # %bb.20:
; RV32ZBB-NEXT: li a3, 0
-; RV32ZBB-NEXT: j .LBB21_23
-; RV32ZBB-NEXT: .LBB21_22:
+; RV32ZBB-NEXT: j .LBB21_22
+; RV32ZBB-NEXT: .LBB21_21:
; RV32ZBB-NEXT: srl a3, a3, a7
-; RV32ZBB-NEXT: .LBB21_23:
+; RV32ZBB-NEXT: .LBB21_22:
; RV32ZBB-NEXT: or a1, a1, a2
; RV32ZBB-NEXT: or a3, a4, a3
; RV32ZBB-NEXT: add a0, a0, a1
@@ -2601,41 +2615,42 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32XTHEADBB-NEXT: srl t3, a2, t3
; RV32XTHEADBB-NEXT: sll a4, a3, a4
; RV32XTHEADBB-NEXT: or t3, t3, a4
-; RV32XTHEADBB-NEXT: j .LBB21_14
+; RV32XTHEADBB-NEXT: mv a4, a3
+; RV32XTHEADBB-NEXT: bnez t1, .LBB21_14
+; RV32XTHEADBB-NEXT: j .LBB21_15
; RV32XTHEADBB-NEXT: .LBB21_12:
; RV32XTHEADBB-NEXT: srl t5, a1, a7
; RV32XTHEADBB-NEXT: bltu t1, a5, .LBB21_11
; RV32XTHEADBB-NEXT: .LBB21_13:
; RV32XTHEADBB-NEXT: li a1, 0
; RV32XTHEADBB-NEXT: sll t3, a2, t1
-; RV32XTHEADBB-NEXT: .LBB21_14:
; RV32XTHEADBB-NEXT: mv a4, a3
-; RV32XTHEADBB-NEXT: beqz t1, .LBB21_16
-; RV32XTHEADBB-NEXT: # %bb.15:
+; RV32XTHEADBB-NEXT: beqz t1, .LBB21_15
+; RV32XTHEADBB-NEXT: .LBB21_14:
; RV32XTHEADBB-NEXT: mv a4, t3
-; RV32XTHEADBB-NEXT: .LBB21_16:
-; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_18
-; RV32XTHEADBB-NEXT: # %bb.17:
+; RV32XTHEADBB-NEXT: .LBB21_15:
+; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_17
+; RV32XTHEADBB-NEXT: # %bb.16:
; RV32XTHEADBB-NEXT: srl t1, a3, t2
-; RV32XTHEADBB-NEXT: bnez t2, .LBB21_19
-; RV32XTHEADBB-NEXT: j .LBB21_20
-; RV32XTHEADBB-NEXT: .LBB21_18:
+; RV32XTHEADBB-NEXT: bnez t2, .LBB21_18
+; RV32XTHEADBB-NEXT: j .LBB21_19
+; RV32XTHEADBB-NEXT: .LBB21_17:
; RV32XTHEADBB-NEXT: srl t1, a2, a7
; RV32XTHEADBB-NEXT: sll t3, a3, t4
; RV32XTHEADBB-NEXT: or t1, t1, t3
-; RV32XTHEADBB-NEXT: beqz t2, .LBB21_20
-; RV32XTHEADBB-NEXT: .LBB21_19:
+; RV32XTHEADBB-NEXT: beqz t2, .LBB21_19
+; RV32XTHEADBB-NEXT: .LBB21_18:
; RV32XTHEADBB-NEXT: mv a2, t1
-; RV32XTHEADBB-NEXT: .LBB21_20:
+; RV32XTHEADBB-NEXT: .LBB21_19:
; RV32XTHEADBB-NEXT: or a0, a6, a0
; RV32XTHEADBB-NEXT: or a6, t0, t5
-; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_22
-; RV32XTHEADBB-NEXT: # %bb.21:
+; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_21
+; RV32XTHEADBB-NEXT: # %bb.20:
; RV32XTHEADBB-NEXT: li a3, 0
-; RV32XTHEADBB-NEXT: j .LBB21_23
-; RV32XTHEADBB-NEXT: .LBB21_22:
+; RV32XTHEADBB-NEXT: j .LBB21_22
+; RV32XTHEADBB-NEXT: .LBB21_21:
; RV32XTHEADBB-NEXT: srl a3, a3, a7
-; RV32XTHEADBB-NEXT: .LBB21_23:
+; RV32XTHEADBB-NEXT: .LBB21_22:
; RV32XTHEADBB-NEXT: or a1, a1, a2
; RV32XTHEADBB-NEXT: or a3, a4, a3
; RV32XTHEADBB-NEXT: add a0, a0, a1
@@ -2747,79 +2762,81 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32I-NEXT: bltu t0, a6, .LBB23_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a7, a1, t0
-; RV32I-NEXT: j .LBB23_3
+; RV32I-NEXT: mv a5, a0
+; RV32I-NEXT: bnez t0, .LBB23_3
+; RV32I-NEXT: j .LBB23_4
; RV32I-NEXT: .LBB23_2:
; RV32I-NEXT: srl a5, a0, a4
; RV32I-NEXT: sll a7, a1, t4
; RV32I-NEXT: or a7, a5, a7
-; RV32I-NEXT: .LBB23_3:
; RV32I-NEXT: mv a5, a0
-; RV32I-NEXT: beqz t0, .LBB23_5
-; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: beqz t0, .LBB23_4
+; RV32I-NEXT: .LBB23_3:
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB23_5:
+; RV32I-NEXT: .LBB23_4:
; RV32I-NEXT: neg t2, t0
-; RV32I-NEXT: bltu t0, a6, .LBB23_7
-; RV32I-NEXT: # %bb.6:
+; RV32I-NEXT: bltu t0, a6, .LBB23_6
+; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: li a7, 0
-; RV32I-NEXT: j .LBB23_8
-; RV32I-NEXT: .LBB23_7:
+; RV32I-NEXT: j .LBB23_7
+; RV32I-NEXT: .LBB23_6:
; RV32I-NEXT: srl a7, a1, a4
-; RV32I-NEXT: .LBB23_8:
+; RV32I-NEXT: .LBB23_7:
; RV32I-NEXT: andi t1, t2, 63
; RV32I-NEXT: neg t5, t1
-; RV32I-NEXT: bltu t1, a6, .LBB23_10
-; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: bltu t1, a6, .LBB23_9
+; RV32I-NEXT: # %bb.8:
; RV32I-NEXT: li t3, 0
; RV32I-NEXT: sll a0, a0, t1
-; RV32I-NEXT: bnez t1, .LBB23_11
-; RV32I-NEXT: j .LBB23_12
-; RV32I-NEXT: .LBB23_10:
+; RV32I-NEXT: bnez t1, .LBB23_10
+; RV32I-NEXT: j .LBB23_11
+; RV32I-NEXT: .LBB23_9:
; RV32I-NEXT: sll t3, a0, t2
; RV32I-NEXT: srl a0, a0, t5
; RV32I-NEXT: sll t6, a1, t2
; RV32I-NEXT: or a0, a0, t6
-; RV32I-NEXT: beqz t1, .LBB23_12
-; RV32I-NEXT: .LBB23_11:
+; RV32I-NEXT: beqz t1, .LBB23_11
+; RV32I-NEXT: .LBB23_10:
; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: .LBB23_12:
-; RV32I-NEXT: bltu t0, a6, .LBB23_14
-; RV32I-NEXT: # %bb.13:
+; RV32I-NEXT: .LBB23_11:
+; RV32I-NEXT: bltu t0, a6, .LBB23_13
+; RV32I-NEXT: # %bb.12:
; RV32I-NEXT: srl t4, a3, t0
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: bnez t0, .LBB23_14
; RV32I-NEXT: j .LBB23_15
-; RV32I-NEXT: .LBB23_14:
+; RV32I-NEXT: .LBB23_13:
; RV32I-NEXT: srl a0, a2, a4
; RV32I-NEXT: sll t4, a3, t4
; RV32I-NEXT: or t4, a0, t4
-; RV32I-NEXT: .LBB23_15:
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz t0, .LBB23_17
-; RV32I-NEXT: # %bb.16:
+; RV32I-NEXT: beqz t0, .LBB23_15
+; RV32I-NEXT: .LBB23_14:
; RV32I-NEXT: mv a0, t4
-; RV32I-NEXT: .LBB23_17:
-; RV32I-NEXT: bltu t0, a6, .LBB23_20
-; RV32I-NEXT: # %bb.18:
+; RV32I-NEXT: .LBB23_15:
+; RV32I-NEXT: bltu t0, a6, .LBB23_18
+; RV32I-NEXT: # %bb.16:
; RV32I-NEXT: li a4, 0
-; RV32I-NEXT: bgeu t1, a6, .LBB23_21
-; RV32I-NEXT: .LBB23_19:
+; RV32I-NEXT: bgeu t1, a6, .LBB23_19
+; RV32I-NEXT: .LBB23_17:
; RV32I-NEXT: sll a6, a2, t2
; RV32I-NEXT: srl a2, a2, t5
; RV32I-NEXT: sll t0, a3, t2
; RV32I-NEXT: or a2, a2, t0
-; RV32I-NEXT: j .LBB23_22
-; RV32I-NEXT: .LBB23_20:
+; RV32I-NEXT: j .LBB23_20
+; RV32I-NEXT: .LBB23_18:
; RV32I-NEXT: srl a4, a3, a4
-; RV32I-NEXT: bltu t1, a6, .LBB23_19
-; RV32I-NEXT: .LBB23_21:
+; RV32I-NEXT: bltu t1, a6, .LBB23_17
+; RV32I-NEXT: .LBB23_19:
; RV32I-NEXT: li a6, 0
; RV32I-NEXT: sll a2, a2, t1
-; RV32I-NEXT: .LBB23_22:
+; RV32I-NEXT: .LBB23_20:
; RV32I-NEXT: or a5, a5, t3
; RV32I-NEXT: or a1, a7, a1
-; RV32I-NEXT: beqz t1, .LBB23_24
-; RV32I-NEXT: # %bb.23:
+; RV32I-NEXT: beqz t1, .LBB23_22
+; RV32I-NEXT: # %bb.21:
; RV32I-NEXT: mv a3, a2
-; RV32I-NEXT: .LBB23_24:
+; RV32I-NEXT: .LBB23_22:
; RV32I-NEXT: or a2, a0, a6
; RV32I-NEXT: or a3, a4, a3
; RV32I-NEXT: add a0, a5, a2
@@ -2849,79 +2866,81 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32ZBB-NEXT: bltu t0, a6, .LBB23_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: srl a7, a1, t0
-; RV32ZBB-NEXT: j .LBB23_3
+; RV32ZBB-NEXT: mv a5, a0
+; RV32ZBB-NEXT: bnez t0, .LBB23_3
+; RV32ZBB-NEXT: j .LBB23_4
; RV32ZBB-NEXT: .LBB23_2:
; RV32ZBB-NEXT: srl a5, a0, a4
; RV32ZBB-NEXT: sll a7, a1, t4
; RV32ZBB-NEXT: or a7, a5, a7
-; RV32ZBB-NEXT: .LBB23_3:
; RV32ZBB-NEXT: mv a5, a0
-; RV32ZBB-NEXT: beqz t0, .LBB23_5
-; RV32ZBB-NEXT: # %bb.4:
+; RV32ZBB-NEXT: beqz t0, .LBB23_4
+; RV32ZBB-NEXT: .LBB23_3:
; RV32ZBB-NEXT: mv a5, a7
-; RV32ZBB-NEXT: .LBB23_5:
+; RV32ZBB-NEXT: .LBB23_4:
; RV32ZBB-NEXT: neg t2, t0
-; RV32ZBB-NEXT: bltu t0, a6, .LBB23_7
-; RV32ZBB-NEXT: # %bb.6:
+; RV32ZBB-NEXT: bltu t0, a6, .LBB23_6
+; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: li a7, 0
-; RV32ZBB-NEXT: j .LBB23_8
-; RV32ZBB-NEXT: .LBB23_7:
+; RV32ZBB-NEXT: j .LBB23_7
+; RV32ZBB-NEXT: .LBB23_6:
; RV32ZBB-NEXT: srl a7, a1, a4
-; RV32ZBB-NEXT: .LBB23_8:
+; RV32ZBB-NEXT: .LBB23_7:
; RV32ZBB-NEXT: andi t1, t2, 63
; RV32ZBB-NEXT: neg t5, t1
-; RV32ZBB-NEXT: bltu t1, a6, .LBB23_10
-; RV32ZBB-NEXT: # %bb.9:
+; RV32ZBB-NEXT: bltu t1, a6, .LBB23_9
+; RV32ZBB-NEXT: # %bb.8:
; RV32ZBB-NEXT: li t3, 0
; RV32ZBB-NEXT: sll a0, a0, t1
-; RV32ZBB-NEXT: bnez t1, .LBB23_11
-; RV32ZBB-NEXT: j .LBB23_12
-; RV32ZBB-NEXT: .LBB23_10:
+; RV32ZBB-NEXT: bnez t1, .LBB23_10
+; RV32ZBB-NEXT: j .LBB23_11
+; RV32ZBB-NEXT: .LBB23_9:
; RV32ZBB-NEXT: sll t3, a0, t2
; RV32ZBB-NEXT: srl a0, a0, t5
; RV32ZBB-NEXT: sll t6, a1, t2
; RV32ZBB-NEXT: or a0, a0, t6
-; RV32ZBB-NEXT: beqz t1, .LBB23_12
-; RV32ZBB-NEXT: .LBB23_11:
+; RV32ZBB-NEXT: beqz t1, .LBB23_11
+; RV32ZBB-NEXT: .LBB23_10:
; RV32ZBB-NEXT: mv a1, a0
-; RV32ZBB-NEXT: .LBB23_12:
-; RV32ZBB-NEXT: bltu t0, a6, .LBB23_14
-; RV32ZBB-NEXT: # %bb.13:
+; RV32ZBB-NEXT: .LBB23_11:
+; RV32ZBB-NEXT: bltu t0, a6, .LBB23_13
+; RV32ZBB-NEXT: # %bb.12:
; RV32ZBB-NEXT: srl t4, a3, t0
+; RV32ZBB-NEXT: mv a0, a2
+; RV32ZBB-NEXT: bnez t0, .LBB23_14
; RV32ZBB-NEXT: j .LBB23_15
-; RV32ZBB-NEXT: .LBB23_14:
+; RV32ZBB-NEXT: .LBB23_13:
; RV32ZBB-NEXT: srl a0, a2, a4
; RV32ZBB-NEXT: sll t4, a3, t4
; RV32ZBB-NEXT: or t4, a0, t4
-; RV32ZBB-NEXT: .LBB23_15:
; RV32ZBB-NEXT: mv a0, a2
-; RV32ZBB-NEXT: beqz t0, .LBB23_17
-; RV32ZBB-NEXT: # %bb.16:
+; RV32ZBB-NEXT: beqz t0, .LBB23_15
+; RV32ZBB-NEXT: .LBB23_14:
; RV32ZBB-NEXT: mv a0, t4
-; RV32ZBB-NEXT: .LBB23_17:
-; RV32ZBB-NEXT: bltu t0, a6, .LBB23_20
-; RV32ZBB-NEXT: # %bb.18:
+; RV32ZBB-NEXT: .LBB23_15:
+; RV32ZBB-NEXT: bltu t0, a6, .LBB23_18
+; RV32ZBB-NEXT: # %bb.16:
; RV32ZBB-NEXT: li a4, 0
-; RV32ZBB-NEXT: bgeu t1, a6, .LBB23_21
-; RV32ZBB-NEXT: .LBB23_19:
+; RV32ZBB-NEXT: bgeu t1, a6, .LBB23_19
+; RV32ZBB-NEXT: .LBB23_17:
; RV32ZBB-NEXT: sll a6, a2, t2
; RV32ZBB-NEXT: srl a2, a2, t5
; RV32ZBB-NEXT: sll t0, a3, t2
; RV32ZBB-NEXT: or a2, a2, t0
-; RV32ZBB-NEXT: j .LBB23_22
-; RV32ZBB-NEXT: .LBB23_20:
+; RV32ZBB-NEXT: j .LBB23_20
+; RV32ZBB-NEXT: .LBB23_18:
; RV32ZBB-NEXT: srl a4, a3, a4
-; RV32ZBB-NEXT: bltu t1, a6, .LBB23_19
-; RV32ZBB-NEXT: .LBB23_21:
+; RV32ZBB-NEXT: bltu t1, a6, .LBB23_17
+; RV32ZBB-NEXT: .LBB23_19:
; RV32ZBB-NEXT: li a6, 0
; RV32ZBB-NEXT: sll a2, a2, t1
-; RV32ZBB-NEXT: .LBB23_22:
+; RV32ZBB-NEXT: .LBB23_20:
; RV32ZBB-NEXT: or a5, a5, t3
; RV32ZBB-NEXT: or a1, a7, a1
-; RV32ZBB-NEXT: beqz t1, .LBB23_24
-; RV32ZBB-NEXT: # %bb.23:
+; RV32ZBB-NEXT: beqz t1, .LBB23_22
+; RV32ZBB-NEXT: # %bb.21:
; RV32ZBB-NEXT: mv a3, a2
-; RV32ZBB-NEXT: .LBB23_24:
+; RV32ZBB-NEXT: .LBB23_22:
; RV32ZBB-NEXT: or a2, a0, a6
; RV32ZBB-NEXT: or a3, a4, a3
; RV32ZBB-NEXT: add a0, a5, a2
@@ -2945,79 +2964,81 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_2
; RV32XTHEADBB-NEXT: # %bb.1:
; RV32XTHEADBB-NEXT: srl a7, a1, t0
-; RV32XTHEADBB-NEXT: j .LBB23_3
+; RV32XTHEADBB-NEXT: mv a5, a0
+; RV32XTHEADBB-NEXT: bnez t0, .LBB23_3
+; RV32XTHEADBB-NEXT: j .LBB23_4
; RV32XTHEADBB-NEXT: .LBB23_2:
; RV32XTHEADBB-NEXT: srl a5, a0, a4
; RV32XTHEADBB-NEXT: sll a7, a1, t4
; RV32XTHEADBB-NEXT: or a7, a5, a7
-; RV32XTHEADBB-NEXT: .LBB23_3:
; RV32XTHEADBB-NEXT: mv a5, a0
-; RV32XTHEADBB-NEXT: beqz t0, .LBB23_5
-; RV32XTHEADBB-NEXT: # %bb.4:
+; RV32XTHEADBB-NEXT: beqz t0, .LBB23_4
+; RV32XTHEADBB-NEXT: .LBB23_3:
; RV32XTHEADBB-NEXT: mv a5, a7
-; RV32XTHEADBB-NEXT: .LBB23_5:
+; RV32XTHEADBB-NEXT: .LBB23_4:
; RV32XTHEADBB-NEXT: neg t2, t0
-; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_7
-; RV32XTHEADBB-NEXT: # %bb.6:
+; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_6
+; RV32XTHEADBB-NEXT: # %bb.5:
; RV32XTHEADBB-NEXT: li a7, 0
-; RV32XTHEADBB-NEXT: j .LBB23_8
-; RV32XTHEADBB-NEXT: .LBB23_7:
+; RV32XTHEADBB-NEXT: j .LBB23_7
+; RV32XTHEADBB-NEXT: .LBB23_6:
; RV32XTHEADBB-NEXT: srl a7, a1, a4
-; RV32XTHEADBB-NEXT: .LBB23_8:
+; RV32XTHEADBB-NEXT: .LBB23_7:
; RV32XTHEADBB-NEXT: andi t1, t2, 63
; RV32XTHEADBB-NEXT: neg t5, t1
-; RV32XTHEADBB-NEXT: bltu t1, a6, .LBB23_10
-; RV32XTHEADBB-NEXT: # %bb.9:
+; RV32XTHEADBB-NEXT: bltu t1, a6, .LBB23_9
+; RV32XTHEADBB-NEXT: # %bb.8:
; RV32XTHEADBB-NEXT: li t3, 0
; RV32XTHEADBB-NEXT: sll a0, a0, t1
-; RV32XTHEADBB-NEXT: bnez t1, .LBB23_11
-; RV32XTHEADBB-NEXT: j .LBB23_12
-; RV32XTHEADBB-NEXT: .LBB23_10:
+; RV32XTHEADBB-NEXT: bnez t1, .LBB23_10
+; RV32XTHEADBB-NEXT: j .LBB23_11
+; RV32XTHEADBB-NEXT: .LBB23_9:
; RV32XTHEADBB-NEXT: sll t3, a0, t2
; RV32XTHEADBB-NEXT: srl a0, a0, t5
; RV32XTHEADBB-NEXT: sll t6, a1, t2
; RV32XTHEADBB-NEXT: or a0, a0, t6
-; RV32XTHEADBB-NEXT: beqz t1, .LBB23_12
-; RV32XTHEADBB-NEXT: .LBB23_11:
+; RV32XTHEADBB-NEXT: beqz t1, .LBB23_11
+; RV32XTHEADBB-NEXT: .LBB23_10:
; RV32XTHEADBB-NEXT: mv a1, a0
-; RV32XTHEADBB-NEXT: .LBB23_12:
-; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_14
-; RV32XTHEADBB-NEXT: # %bb.13:
+; RV32XTHEADBB-NEXT: .LBB23_11:
+; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_13
+; RV32XTHEADBB-NEXT: # %bb.12:
; RV32XTHEADBB-NEXT: srl t4, a3, t0
+; RV32XTHEADBB-NEXT: mv a0, a2
+; RV32XTHEADBB-NEXT: bnez t0, .LBB23_14
; RV32XTHEADBB-NEXT: j .LBB23_15
-; RV32XTHEADBB-NEXT: .LBB23_14:
+; RV32XTHEADBB-NEXT: .LBB23_13:
; RV32XTHEADBB-NEXT: srl a0, a2, a4
; RV32XTHEADBB-NEXT: sll t4, a3, t4
; RV32XTHEADBB-NEXT: or t4, a0, t4
-; RV32XTHEADBB-NEXT: .LBB23_15:
; RV32XTHEADBB-NEXT: mv a0, a2
-; RV32XTHEADBB-NEXT: beqz t0, .LBB23_17
-; RV32XTHEADBB-NEXT: # %bb.16:
+; RV32XTHEADBB-NEXT: beqz t0, .LBB23_15
+; RV32XTHEADBB-NEXT: .LBB23_14:
; RV32XTHEADBB-NEXT: mv a0, t4
-; RV32XTHEADBB-NEXT: .LBB23_17:
-; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_20
-; RV32XTHEADBB-NEXT: # %bb.18:
+; RV32XTHEADBB-NEXT: .LBB23_15:
+; RV32XTHEADBB-NEXT: bltu t0, a6, .LBB23_18
+; RV32XTHEADBB-NEXT: # %bb.16:
; RV32XTHEADBB-NEXT: li a4, 0
-; RV32XTHEADBB-NEXT: bgeu t1, a6, .LBB23_21
-; RV32XTHEADBB-NEXT: .LBB23_19:
+; RV32XTHEADBB-NEXT: bgeu t1, a6, .LBB23_19
+; RV32XTHEADBB-NEXT: .LBB23_17:
; RV32XTHEADBB-NEXT: sll a6, a2, t2
; RV32XTHEADBB-NEXT: srl a2, a2, t5
; RV32XTHEADBB-NEXT: sll t0, a3, t2
; RV32XTHEADBB-NEXT: or a2, a2, t0
-; RV32XTHEADBB-NEXT: j .LBB23_22
-; RV32XTHEADBB-NEXT: .LBB23_20:
+; RV32XTHEADBB-NEXT: j .LBB23_20
+; RV32XTHEADBB-NEXT: .LBB23_18:
; RV32XTHEADBB-NEXT: srl a4, a3, a4
-; RV32XTHEADBB-NEXT: bltu t1, a6, .LBB23_19
-; RV32XTHEADBB-NEXT: .LBB23_21:
+; RV32XTHEADBB-NEXT: bltu t1, a6, .LBB23_17
+; RV32XTHEADBB-NEXT: .LBB23_19:
; RV32XTHEADBB-NEXT: li a6, 0
; RV32XTHEADBB-NEXT: sll a2, a2, t1
-; RV32XTHEADBB-NEXT: .LBB23_22:
+; RV32XTHEADBB-NEXT: .LBB23_20:
; RV32XTHEADBB-NEXT: or a5, a5, t3
; RV32XTHEADBB-NEXT: or a1, a7, a1
-; RV32XTHEADBB-NEXT: beqz t1, .LBB23_24
-; RV32XTHEADBB-NEXT: # %bb.23:
+; RV32XTHEADBB-NEXT: beqz t1, .LBB23_22
+; RV32XTHEADBB-NEXT: # %bb.21:
; RV32XTHEADBB-NEXT: mv a3, a2
-; RV32XTHEADBB-NEXT: .LBB23_24:
+; RV32XTHEADBB-NEXT: .LBB23_22:
; RV32XTHEADBB-NEXT: or a2, a0, a6
; RV32XTHEADBB-NEXT: or a3, a4, a3
; RV32XTHEADBB-NEXT: add a0, a5, a2
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
index 073cd4e01e003..da95481a5e588 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
@@ -219,43 +219,44 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: bltu a5, a4, .LBB9_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: srl a6, a1, a5
-; CHECK-NEXT: j .LBB9_3
+; CHECK-NEXT: mv a3, a0
+; CHECK-NEXT: bnez a5, .LBB9_3
+; CHECK-NEXT: j .LBB9_4
; CHECK-NEXT: .LBB9_2:
; CHECK-NEXT: srl a3, a0, a2
; CHECK-NEXT: neg a6, a5
; CHECK-NEXT: sll a6, a1, a6
; CHECK-NEXT: or a6, a3, a6
-; CHECK-NEXT: .LBB9_3:
; CHECK-NEXT: mv a3, a0
-; CHECK-NEXT: beqz a5, .LBB9_5
-; CHECK-NEXT: # %bb.4:
+; CHECK-NEXT: beqz a5, .LBB9_4
+; CHECK-NEXT: .LBB9_3:
; CHECK-NEXT: mv a3, a6
-; CHECK-NEXT: .LBB9_5:
+; CHECK-NEXT: .LBB9_4:
; CHECK-NEXT: neg a6, a2
-; CHECK-NEXT: bltu a5, a4, .LBB9_9
-; CHECK-NEXT: # %bb.6:
+; CHECK-NEXT: bltu a5, a4, .LBB9_7
+; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: .LBB9_7:
; CHECK-NEXT: andi a5, a6, 63
-; CHECK-NEXT: bgeu a5, a4, .LBB9_10
-; CHECK-NEXT: # %bb.8:
+; CHECK-NEXT: bgeu a5, a4, .LBB9_8
+; CHECK-NEXT: .LBB9_6:
; CHECK-NEXT: sll a4, a0, a6
; CHECK-NEXT: neg a7, a5
; CHECK-NEXT: srl a0, a0, a7
; CHECK-NEXT: sll a6, a1, a6
; CHECK-NEXT: or a0, a0, a6
-; CHECK-NEXT: bnez a5, .LBB9_11
-; CHECK-NEXT: j .LBB9_12
-; CHECK-NEXT: .LBB9_9:
+; CHECK-NEXT: bnez a5, .LBB9_9
+; CHECK-NEXT: j .LBB9_10
+; CHECK-NEXT: .LBB9_7:
; CHECK-NEXT: srl a2, a1, a2
-; CHECK-NEXT: j .LBB9_7
-; CHECK-NEXT: .LBB9_10:
+; CHECK-NEXT: andi a5, a6, 63
+; CHECK-NEXT: bltu a5, a4, .LBB9_6
+; CHECK-NEXT: .LBB9_8:
; CHECK-NEXT: li a4, 0
; CHECK-NEXT: sll a0, a0, a5
-; CHECK-NEXT: beqz a5, .LBB9_12
-; CHECK-NEXT: .LBB9_11:
+; CHECK-NEXT: beqz a5, .LBB9_10
+; CHECK-NEXT: .LBB9_9:
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: .LBB9_12:
+; CHECK-NEXT: .LBB9_10:
; CHECK-NEXT: or a0, a3, a4
; CHECK-NEXT: or a1, a2, a1
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll
index 15faf278080e3..8b262db56ccd2 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll
@@ -221,94 +221,95 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: bltu a2, t0, .LBB6_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a5, a7, a2
-; RV32I-NEXT: j .LBB6_3
+; RV32I-NEXT: mv a4, a3
+; RV32I-NEXT: bnez a2, .LBB6_3
+; RV32I-NEXT: j .LBB6_4
; RV32I-NEXT: .LBB6_2:
; RV32I-NEXT: or a5, t2, t5
-; RV32I-NEXT: .LBB6_3:
; RV32I-NEXT: mv a4, a3
-; RV32I-NEXT: beqz a2, .LBB6_5
-; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: beqz a2, .LBB6_4
+; RV32I-NEXT: .LBB6_3:
; RV32I-NEXT: mv a4, a5
-; RV32I-NEXT: .LBB6_5:
+; RV32I-NEXT: .LBB6_4:
; RV32I-NEXT: lw a5, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: bltu a2, t0, .LBB6_7
-; RV32I-NEXT: # %bb.6:
+; RV32I-NEXT: bltu a2, t0, .LBB6_6
+; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: li a6, 0
; RV32I-NEXT: srl t4, a1, a2
-; RV32I-NEXT: j .LBB6_8
-; RV32I-NEXT: .LBB6_7:
+; RV32I-NEXT: j .LBB6_7
+; RV32I-NEXT: .LBB6_6:
; RV32I-NEXT: srl a6, a7, a2
; RV32I-NEXT: srl t1, a5, a2
; RV32I-NEXT: sll t3, a1, t6
; RV32I-NEXT: or t4, t1, t3
-; RV32I-NEXT: .LBB6_8:
+; RV32I-NEXT: .LBB6_7:
; RV32I-NEXT: li t1, 64
; RV32I-NEXT: mv t3, a5
-; RV32I-NEXT: beqz a2, .LBB6_10
-; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: beqz a2, .LBB6_9
+; RV32I-NEXT: # %bb.8:
; RV32I-NEXT: mv t3, t4
-; RV32I-NEXT: .LBB6_10:
+; RV32I-NEXT: .LBB6_9:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s0, t1, a2
-; RV32I-NEXT: bltu a2, t0, .LBB6_13
-; RV32I-NEXT: # %bb.11:
+; RV32I-NEXT: bltu a2, t0, .LBB6_12
+; RV32I-NEXT: # %bb.10:
; RV32I-NEXT: li t4, 0
-; RV32I-NEXT: bgeu s0, t0, .LBB6_14
-; RV32I-NEXT: .LBB6_12:
+; RV32I-NEXT: bgeu s0, t0, .LBB6_13
+; RV32I-NEXT: .LBB6_11:
; RV32I-NEXT: sll t6, a3, t6
; RV32I-NEXT: neg s1, s0
; RV32I-NEXT: srl s1, a3, s1
; RV32I-NEXT: or s2, s1, t5
-; RV32I-NEXT: j .LBB6_15
-; RV32I-NEXT: .LBB6_13:
+; RV32I-NEXT: j .LBB6_14
+; RV32I-NEXT: .LBB6_12:
; RV32I-NEXT: srl t4, a1, a2
-; RV32I-NEXT: bltu s0, t0, .LBB6_12
-; RV32I-NEXT: .LBB6_14:
+; RV32I-NEXT: bltu s0, t0, .LBB6_11
+; RV32I-NEXT: .LBB6_13:
; RV32I-NEXT: li t6, 0
; RV32I-NEXT: sll s2, a3, s0
-; RV32I-NEXT: .LBB6_15:
+; RV32I-NEXT: .LBB6_14:
; RV32I-NEXT: addi s1, a2, -64
; RV32I-NEXT: mv t5, a7
-; RV32I-NEXT: beqz s0, .LBB6_17
-; RV32I-NEXT: # %bb.16:
+; RV32I-NEXT: beqz s0, .LBB6_16
+; RV32I-NEXT: # %bb.15:
; RV32I-NEXT: mv t5, s2
-; RV32I-NEXT: .LBB6_17:
-; RV32I-NEXT: bltu s1, t0, .LBB6_19
-; RV32I-NEXT: # %bb.18:
+; RV32I-NEXT: .LBB6_16:
+; RV32I-NEXT: bltu s1, t0, .LBB6_18
+; RV32I-NEXT: # %bb.17:
; RV32I-NEXT: srl t2, a7, s1
-; RV32I-NEXT: bnez s1, .LBB6_20
-; RV32I-NEXT: j .LBB6_21
-; RV32I-NEXT: .LBB6_19:
+; RV32I-NEXT: bnez s1, .LBB6_19
+; RV32I-NEXT: j .LBB6_20
+; RV32I-NEXT: .LBB6_18:
; RV32I-NEXT: neg s0, s1
; RV32I-NEXT: sll s0, a7, s0
; RV32I-NEXT: or t2, t2, s0
-; RV32I-NEXT: beqz s1, .LBB6_21
-; RV32I-NEXT: .LBB6_20:
+; RV32I-NEXT: beqz s1, .LBB6_20
+; RV32I-NEXT: .LBB6_19:
; RV32I-NEXT: mv a3, t2
-; RV32I-NEXT: .LBB6_21:
-; RV32I-NEXT: bltu s1, t0, .LBB6_23
-; RV32I-NEXT: # %bb.22:
+; RV32I-NEXT: .LBB6_20:
+; RV32I-NEXT: bltu s1, t0, .LBB6_22
+; RV32I-NEXT: # %bb.21:
; RV32I-NEXT: li a7, 0
-; RV32I-NEXT: bltu a2, t1, .LBB6_24
-; RV32I-NEXT: j .LBB6_25
-; RV32I-NEXT: .LBB6_23:
+; RV32I-NEXT: bltu a2, t1, .LBB6_23
+; RV32I-NEXT: j .LBB6_24
+; RV32I-NEXT: .LBB6_22:
; RV32I-NEXT: srl a7, a7, a2
-; RV32I-NEXT: bgeu a2, t1, .LBB6_25
-; RV32I-NEXT: .LBB6_24:
+; RV32I-NEXT: bgeu a2, t1, .LBB6_24
+; RV32I-NEXT: .LBB6_23:
; RV32I-NEXT: or a3, t3, t6
; RV32I-NEXT: or a7, t4, t5
-; RV32I-NEXT: .LBB6_25:
-; RV32I-NEXT: bnez a2, .LBB6_29
-; RV32I-NEXT: # %bb.26:
-; RV32I-NEXT: bltu a2, t1, .LBB6_28
-; RV32I-NEXT: .LBB6_27:
+; RV32I-NEXT: .LBB6_24:
+; RV32I-NEXT: bnez a2, .LBB6_28
+; RV32I-NEXT: # %bb.25:
+; RV32I-NEXT: bltu a2, t1, .LBB6_27
+; RV32I-NEXT: .LBB6_26:
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a6, 0
-; RV32I-NEXT: .LBB6_28:
+; RV32I-NEXT: .LBB6_27:
; RV32I-NEXT: sw a5, 0(a0)
; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a4, 8(a0)
@@ -318,11 +319,11 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB6_29:
+; RV32I-NEXT: .LBB6_28:
; RV32I-NEXT: mv a5, a3
; RV32I-NEXT: mv a1, a7
-; RV32I-NEXT: bgeu a2, t1, .LBB6_27
-; RV32I-NEXT: j .LBB6_28
+; RV32I-NEXT: bgeu a2, t1, .LBB6_26
+; RV32I-NEXT: j .LBB6_27
;
; RV64I-LABEL: lshr128:
; RV64I: # %bb.0:
@@ -366,94 +367,95 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: bltu a2, t0, .LBB7_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sra a6, a3, a2
-; RV32I-NEXT: j .LBB7_3
+; RV32I-NEXT: mv a5, a4
+; RV32I-NEXT: bnez a2, .LBB7_3
+; RV32I-NEXT: j .LBB7_4
; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: or a6, t2, t5
-; RV32I-NEXT: .LBB7_3:
; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: beqz a2, .LBB7_5
-; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: beqz a2, .LBB7_4
+; RV32I-NEXT: .LBB7_3:
; RV32I-NEXT: mv a5, a6
-; RV32I-NEXT: .LBB7_5:
+; RV32I-NEXT: .LBB7_4:
; RV32I-NEXT: lw a6, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: bltu a2, t0, .LBB7_7
-; RV32I-NEXT: # %bb.6:
+; RV32I-NEXT: bltu a2, t0, .LBB7_6
+; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: srai a7, a3, 31
; RV32I-NEXT: srl t4, a1, a2
-; RV32I-NEXT: j .LBB7_8
-; RV32I-NEXT: .LBB7_7:
+; RV32I-NEXT: j .LBB7_7
+; RV32I-NEXT: .LBB7_6:
; RV32I-NEXT: sra a7, a3, a2
; RV32I-NEXT: srl t1, a6, a2
; RV32I-NEXT: sll t3, a1, t6
; RV32I-NEXT: or t4, t1, t3
-; RV32I-NEXT: .LBB7_8:
+; RV32I-NEXT: .LBB7_7:
; RV32I-NEXT: li t1, 64
; RV32I-NEXT: mv t3, a6
-; RV32I-NEXT: beqz a2, .LBB7_10
-; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: beqz a2, .LBB7_9
+; RV32I-NEXT: # %bb.8:
; RV32I-NEXT: mv t3, t4
-; RV32I-NEXT: .LBB7_10:
+; RV32I-NEXT: .LBB7_9:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s0, t1, a2
-; RV32I-NEXT: bltu a2, t0, .LBB7_13
-; RV32I-NEXT: # %bb.11:
+; RV32I-NEXT: bltu a2, t0, .LBB7_12
+; RV32I-NEXT: # %bb.10:
; RV32I-NEXT: li t4, 0
-; RV32I-NEXT: bgeu s0, t0, .LBB7_14
-; RV32I-NEXT: .LBB7_12:
+; RV32I-NEXT: bgeu s0, t0, .LBB7_13
+; RV32I-NEXT: .LBB7_11:
; RV32I-NEXT: sll t6, a4, t6
; RV32I-NEXT: neg s1, s0
; RV32I-NEXT: srl s1, a4, s1
; RV32I-NEXT: or s2, s1, t5
-; RV32I-NEXT: j .LBB7_15
-; RV32I-NEXT: .LBB7_13:
+; RV32I-NEXT: j .LBB7_14
+; RV32I-NEXT: .LBB7_12:
; RV32I-NEXT: srl t4, a1, a2
-; RV32I-NEXT: bltu s0, t0, .LBB7_12
-; RV32I-NEXT: .LBB7_14:
+; RV32I-NEXT: bltu s0, t0, .LBB7_11
+; RV32I-NEXT: .LBB7_13:
; RV32I-NEXT: li t6, 0
; RV32I-NEXT: sll s2, a4, s0
-; RV32I-NEXT: .LBB7_15:
+; RV32I-NEXT: .LBB7_14:
; RV32I-NEXT: addi s1, a2, -64
; RV32I-NEXT: mv t5, a3
-; RV32I-NEXT: beqz s0, .LBB7_17
-; RV32I-NEXT: # %bb.16:
+; RV32I-NEXT: beqz s0, .LBB7_16
+; RV32I-NEXT: # %bb.15:
; RV32I-NEXT: mv t5, s2
-; RV32I-NEXT: .LBB7_17:
-; RV32I-NEXT: bltu s1, t0, .LBB7_19
-; RV32I-NEXT: # %bb.18:
+; RV32I-NEXT: .LBB7_16:
+; RV32I-NEXT: bltu s1, t0, .LBB7_18
+; RV32I-NEXT: # %bb.17:
; RV32I-NEXT: sra t2, a3, s1
-; RV32I-NEXT: bnez s1, .LBB7_20
-; RV32I-NEXT: j .LBB7_21
-; RV32I-NEXT: .LBB7_19:
+; RV32I-NEXT: bnez s1, .LBB7_19
+; RV32I-NEXT: j .LBB7_20
+; RV32I-NEXT: .LBB7_18:
; RV32I-NEXT: neg s0, s1
; RV32I-NEXT: sll s0, a3, s0
; RV32I-NEXT: or t2, t2, s0
-; RV32I-NEXT: beqz s1, .LBB7_21
-; RV32I-NEXT: .LBB7_20:
+; RV32I-NEXT: beqz s1, .LBB7_20
+; RV32I-NEXT: .LBB7_19:
; RV32I-NEXT: mv a4, t2
-; RV32I-NEXT: .LBB7_21:
-; RV32I-NEXT: bltu s1, t0, .LBB7_23
-; RV32I-NEXT: # %bb.22:
+; RV32I-NEXT: .LBB7_20:
+; RV32I-NEXT: bltu s1, t0, .LBB7_22
+; RV32I-NEXT: # %bb.21:
; RV32I-NEXT: srai t0, a3, 31
-; RV32I-NEXT: bltu a2, t1, .LBB7_24
-; RV32I-NEXT: j .LBB7_25
-; RV32I-NEXT: .LBB7_23:
+; RV32I-NEXT: bltu a2, t1, .LBB7_23
+; RV32I-NEXT: j .LBB7_24
+; RV32I-NEXT: .LBB7_22:
; RV32I-NEXT: sra t0, a3, a2
-; RV32I-NEXT: bgeu a2, t1, .LBB7_25
-; RV32I-NEXT: .LBB7_24:
+; RV32I-NEXT: bgeu a2, t1, .LBB7_24
+; RV32I-NEXT: .LBB7_23:
; RV32I-NEXT: or a4, t3, t6
; RV32I-NEXT: or t0, t4, t5
-; RV32I-NEXT: .LBB7_25:
-; RV32I-NEXT: bnez a2, .LBB7_29
-; RV32I-NEXT: # %bb.26:
-; RV32I-NEXT: bltu a2, t1, .LBB7_28
-; RV32I-NEXT: .LBB7_27:
+; RV32I-NEXT: .LBB7_24:
+; RV32I-NEXT: bnez a2, .LBB7_28
+; RV32I-NEXT: # %bb.25:
+; RV32I-NEXT: bltu a2, t1, .LBB7_27
+; RV32I-NEXT: .LBB7_26:
; RV32I-NEXT: srai a5, a3, 31
; RV32I-NEXT: mv a7, a5
-; RV32I-NEXT: .LBB7_28:
+; RV32I-NEXT: .LBB7_27:
; RV32I-NEXT: sw a6, 0(a0)
; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a5, 8(a0)
@@ -463,11 +465,11 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB7_29:
+; RV32I-NEXT: .LBB7_28:
; RV32I-NEXT: mv a6, a4
; RV32I-NEXT: mv a1, t0
-; RV32I-NEXT: bgeu a2, t1, .LBB7_27
-; RV32I-NEXT: j .LBB7_28
+; RV32I-NEXT: bgeu a2, t1, .LBB7_26
+; RV32I-NEXT: j .LBB7_27
;
; RV64I-LABEL: ashr128:
; RV64I: # %bb.0:
@@ -527,75 +529,76 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: bltu t4, t1, .LBB8_7
; RV32I-NEXT: # %bb.6:
; RV32I-NEXT: srl t2, a3, t4
-; RV32I-NEXT: j .LBB8_8
+; RV32I-NEXT: mv t3, a7
+; RV32I-NEXT: bnez t4, .LBB8_8
+; RV32I-NEXT: j .LBB8_9
; RV32I-NEXT: .LBB8_7:
; RV32I-NEXT: neg t3, t4
; RV32I-NEXT: sll t3, a3, t3
; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: .LBB8_8:
; RV32I-NEXT: mv t3, a7
-; RV32I-NEXT: beqz t4, .LBB8_10
-; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: beqz t4, .LBB8_9
+; RV32I-NEXT: .LBB8_8:
; RV32I-NEXT: mv t3, t2
-; RV32I-NEXT: .LBB8_10:
-; RV32I-NEXT: bltu t4, t1, .LBB8_12
-; RV32I-NEXT: # %bb.11:
+; RV32I-NEXT: .LBB8_9:
+; RV32I-NEXT: bltu t4, t1, .LBB8_11
+; RV32I-NEXT: # %bb.10:
; RV32I-NEXT: li t4, 0
-; RV32I-NEXT: j .LBB8_13
-; RV32I-NEXT: .LBB8_12:
+; RV32I-NEXT: j .LBB8_12
+; RV32I-NEXT: .LBB8_11:
; RV32I-NEXT: srl t4, a3, t5
-; RV32I-NEXT: .LBB8_13:
+; RV32I-NEXT: .LBB8_12:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw t2, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: bltu a2, t1, .LBB8_15
-; RV32I-NEXT: # %bb.14:
+; RV32I-NEXT: bltu a2, t1, .LBB8_14
+; RV32I-NEXT: # %bb.13:
; RV32I-NEXT: li t6, 0
; RV32I-NEXT: sll s1, t2, a2
-; RV32I-NEXT: j .LBB8_16
-; RV32I-NEXT: .LBB8_15:
+; RV32I-NEXT: j .LBB8_15
+; RV32I-NEXT: .LBB8_14:
; RV32I-NEXT: sll t6, t2, a2
; RV32I-NEXT: srl t5, t2, t5
; RV32I-NEXT: sll s0, a1, a2
; RV32I-NEXT: or s1, t5, s0
-; RV32I-NEXT: .LBB8_16:
+; RV32I-NEXT: .LBB8_15:
; RV32I-NEXT: addi s0, a2, -64
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: beqz a2, .LBB8_18
-; RV32I-NEXT: # %bb.17:
+; RV32I-NEXT: beqz a2, .LBB8_17
+; RV32I-NEXT: # %bb.16:
; RV32I-NEXT: mv t5, s1
-; RV32I-NEXT: .LBB8_18:
-; RV32I-NEXT: bltu s0, t1, .LBB8_20
-; RV32I-NEXT: # %bb.19:
+; RV32I-NEXT: .LBB8_17:
+; RV32I-NEXT: bltu s0, t1, .LBB8_19
+; RV32I-NEXT: # %bb.18:
; RV32I-NEXT: li t1, 0
; RV32I-NEXT: sll a7, a7, s0
-; RV32I-NEXT: bnez s0, .LBB8_21
-; RV32I-NEXT: j .LBB8_22
-; RV32I-NEXT: .LBB8_20:
+; RV32I-NEXT: bnez s0, .LBB8_20
+; RV32I-NEXT: j .LBB8_21
+; RV32I-NEXT: .LBB8_19:
; RV32I-NEXT: sll t1, a7, a2
; RV32I-NEXT: neg s1, s0
; RV32I-NEXT: srl a7, a7, s1
; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: beqz s0, .LBB8_22
-; RV32I-NEXT: .LBB8_21:
+; RV32I-NEXT: beqz s0, .LBB8_21
+; RV32I-NEXT: .LBB8_20:
; RV32I-NEXT: mv a3, a7
-; RV32I-NEXT: .LBB8_22:
-; RV32I-NEXT: bltu a2, a6, .LBB8_24
-; RV32I-NEXT: # %bb.23:
+; RV32I-NEXT: .LBB8_21:
+; RV32I-NEXT: bltu a2, a6, .LBB8_23
+; RV32I-NEXT: # %bb.22:
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
-; RV32I-NEXT: bnez a2, .LBB8_25
-; RV32I-NEXT: j .LBB8_26
-; RV32I-NEXT: .LBB8_24:
+; RV32I-NEXT: bnez a2, .LBB8_24
+; RV32I-NEXT: j .LBB8_25
+; RV32I-NEXT: .LBB8_23:
; RV32I-NEXT: or t1, t3, t6
; RV32I-NEXT: or a3, t4, t5
-; RV32I-NEXT: beqz a2, .LBB8_26
-; RV32I-NEXT: .LBB8_25:
+; RV32I-NEXT: beqz a2, .LBB8_25
+; RV32I-NEXT: .LBB8_24:
; RV32I-NEXT: mv t2, t1
; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: .LBB8_26:
+; RV32I-NEXT: .LBB8_25:
; RV32I-NEXT: sw a4, 0(a0)
; RV32I-NEXT: sw a5, 4(a0)
; RV32I-NEXT: sw t2, 8(a0)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
index fd9fb326990a3..bc002fee4417c 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -3210,271 +3210,278 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s8, 0
; RV32I-NEXT: srl a3, a0, a4
-; RV32I-NEXT: j .LBB12_40
+; RV32I-NEXT: mv a5, s0
+; RV32I-NEXT: bnez a4, .LBB12_40
+; RV32I-NEXT: j .LBB12_41
; RV32I-NEXT: .LBB12_39:
; RV32I-NEXT: srl s8, t2, a4
; RV32I-NEXT: srl a3, s0, a4
; RV32I-NEXT: sll a5, a0, s6
; RV32I-NEXT: or a3, a3, a5
-; RV32I-NEXT: .LBB12_40:
; RV32I-NEXT: mv a5, s0
-; RV32I-NEXT: beqz a4, .LBB12_42
-; RV32I-NEXT: # %bb.41:
+; RV32I-NEXT: beqz a4, .LBB12_41
+; RV32I-NEXT: .LBB12_40:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB12_42:
-; RV32I-NEXT: bltu a4, t3, .LBB12_45
-; RV32I-NEXT: # %bb.43:
+; RV32I-NEXT: .LBB12_41:
+; RV32I-NEXT: bltu a4, t3, .LBB12_44
+; RV32I-NEXT: # %bb.42:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t3, .LBB12_46
-; RV32I-NEXT: .LBB12_44:
+; RV32I-NEXT: bgeu s7, t3, .LBB12_45
+; RV32I-NEXT: .LBB12_43:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
+; RV32I-NEXT: mv s10, t2
+; RV32I-NEXT: bnez s7, .LBB12_46
; RV32I-NEXT: j .LBB12_47
-; RV32I-NEXT: .LBB12_45:
+; RV32I-NEXT: .LBB12_44:
; RV32I-NEXT: srl s1, a0, a4
-; RV32I-NEXT: bltu s7, t3, .LBB12_44
-; RV32I-NEXT: .LBB12_46:
+; RV32I-NEXT: bltu s7, t3, .LBB12_43
+; RV32I-NEXT: .LBB12_45:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: .LBB12_47:
; RV32I-NEXT: mv s10, t2
-; RV32I-NEXT: beqz s7, .LBB12_49
-; RV32I-NEXT: # %bb.48:
+; RV32I-NEXT: beqz s7, .LBB12_47
+; RV32I-NEXT: .LBB12_46:
; RV32I-NEXT: mv s10, a3
-; RV32I-NEXT: .LBB12_49:
-; RV32I-NEXT: bltu s9, t3, .LBB12_51
-; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: .LBB12_47:
+; RV32I-NEXT: bltu s9, t3, .LBB12_49
+; RV32I-NEXT: # %bb.48:
; RV32I-NEXT: srl a3, t2, s9
-; RV32I-NEXT: j .LBB12_52
-; RV32I-NEXT: .LBB12_51:
+; RV32I-NEXT: mv s2, t4
+; RV32I-NEXT: bnez s9, .LBB12_50
+; RV32I-NEXT: j .LBB12_51
+; RV32I-NEXT: .LBB12_49:
; RV32I-NEXT: sll a3, t2, s11
; RV32I-NEXT: or a3, s2, a3
-; RV32I-NEXT: .LBB12_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB12_54
-; RV32I-NEXT: # %bb.53:
+; RV32I-NEXT: beqz s9, .LBB12_51
+; RV32I-NEXT: .LBB12_50:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB12_54:
-; RV32I-NEXT: bltu s9, t3, .LBB12_56
-; RV32I-NEXT: # %bb.55:
+; RV32I-NEXT: .LBB12_51:
+; RV32I-NEXT: bltu s9, t3, .LBB12_53
+; RV32I-NEXT: # %bb.52:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a4, t6, .LBB12_57
-; RV32I-NEXT: j .LBB12_58
-; RV32I-NEXT: .LBB12_56:
+; RV32I-NEXT: bltu a4, t6, .LBB12_54
+; RV32I-NEXT: j .LBB12_55
+; RV32I-NEXT: .LBB12_53:
; RV32I-NEXT: srl s7, t2, a4
-; RV32I-NEXT: bgeu a4, t6, .LBB12_58
-; RV32I-NEXT: .LBB12_57:
+; RV32I-NEXT: bgeu a4, t6, .LBB12_55
+; RV32I-NEXT: .LBB12_54:
; RV32I-NEXT: or s2, a5, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB12_58:
+; RV32I-NEXT: .LBB12_55:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a5, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a4, .LBB12_60
-; RV32I-NEXT: # %bb.59:
+; RV32I-NEXT: beqz a4, .LBB12_57
+; RV32I-NEXT: # %bb.56:
; RV32I-NEXT: mv a5, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB12_60:
+; RV32I-NEXT: .LBB12_57:
; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a4
-; RV32I-NEXT: bltu a4, t6, .LBB12_62
-; RV32I-NEXT: # %bb.61:
+; RV32I-NEXT: bltu a4, t6, .LBB12_59
+; RV32I-NEXT: # %bb.58:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: .LBB12_62:
+; RV32I-NEXT: .LBB12_59:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a5, t0, s3
; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t3, .LBB12_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: bltu s2, t3, .LBB12_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a3, t0, s2
-; RV32I-NEXT: j .LBB12_65
-; RV32I-NEXT: .LBB12_64:
+; RV32I-NEXT: j .LBB12_62
+; RV32I-NEXT: .LBB12_61:
; RV32I-NEXT: sll s10, t0, s6
; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB12_65:
+; RV32I-NEXT: .LBB12_62:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a6
-; RV32I-NEXT: beqz s2, .LBB12_67
-; RV32I-NEXT: # %bb.66:
+; RV32I-NEXT: beqz s2, .LBB12_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB12_67:
-; RV32I-NEXT: bltu s1, t3, .LBB12_69
-; RV32I-NEXT: # %bb.68:
+; RV32I-NEXT: .LBB12_64:
+; RV32I-NEXT: bltu s1, t3, .LBB12_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: srl a3, a6, s1
-; RV32I-NEXT: j .LBB12_70
-; RV32I-NEXT: .LBB12_69:
+; RV32I-NEXT: mv a5, t0
+; RV32I-NEXT: bnez s1, .LBB12_67
+; RV32I-NEXT: j .LBB12_68
+; RV32I-NEXT: .LBB12_66:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a6, a3
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB12_70:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz s1, .LBB12_72
-; RV32I-NEXT: # %bb.71:
+; RV32I-NEXT: beqz s1, .LBB12_68
+; RV32I-NEXT: .LBB12_67:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB12_72:
-; RV32I-NEXT: bltu s1, t3, .LBB12_76
-; RV32I-NEXT: # %bb.73:
+; RV32I-NEXT: .LBB12_68:
+; RV32I-NEXT: bltu s1, t3, .LBB12_71
+; RV32I-NEXT: # %bb.69:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: .LBB12_74:
; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t3, .LBB12_77
-; RV32I-NEXT: # %bb.75:
+; RV32I-NEXT: bgeu s2, t3, .LBB12_72
+; RV32I-NEXT: .LBB12_70:
; RV32I-NEXT: sll s6, t1, s6
; RV32I-NEXT: srl a3, t1, s3
; RV32I-NEXT: lw s3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB12_78
-; RV32I-NEXT: .LBB12_76:
+; RV32I-NEXT: j .LBB12_73
+; RV32I-NEXT: .LBB12_71:
; RV32I-NEXT: srl s1, a6, s3
-; RV32I-NEXT: j .LBB12_74
-; RV32I-NEXT: .LBB12_77:
+; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: bltu s2, t3, .LBB12_70
+; RV32I-NEXT: .LBB12_72:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: .LBB12_78:
+; RV32I-NEXT: .LBB12_73:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, t5
-; RV32I-NEXT: beqz s2, .LBB12_80
-; RV32I-NEXT: # %bb.79:
+; RV32I-NEXT: beqz s2, .LBB12_75
+; RV32I-NEXT: # %bb.74:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB12_80:
-; RV32I-NEXT: bltu s9, t3, .LBB12_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: .LBB12_75:
+; RV32I-NEXT: bltu s9, t3, .LBB12_77
+; RV32I-NEXT: # %bb.76:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t0, s9
-; RV32I-NEXT: j .LBB12_83
-; RV32I-NEXT: .LBB12_82:
+; RV32I-NEXT: mv s7, a6
+; RV32I-NEXT: bnez s9, .LBB12_78
+; RV32I-NEXT: j .LBB12_79
+; RV32I-NEXT: .LBB12_77:
; RV32I-NEXT: sll s3, t0, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t0, a3
; RV32I-NEXT: sll s7, a6, s2
; RV32I-NEXT: or a3, a3, s7
-; RV32I-NEXT: .LBB12_83:
; RV32I-NEXT: mv s7, a6
-; RV32I-NEXT: beqz s9, .LBB12_85
-; RV32I-NEXT: # %bb.84:
+; RV32I-NEXT: beqz s9, .LBB12_79
+; RV32I-NEXT: .LBB12_78:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB12_85:
-; RV32I-NEXT: bltu s2, t6, .LBB12_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: .LBB12_79:
+; RV32I-NEXT: bltu s2, t6, .LBB12_81
+; RV32I-NEXT: # %bb.80:
; RV32I-NEXT: sw zero, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB12_88
-; RV32I-NEXT: .LBB12_87:
+; RV32I-NEXT: j .LBB12_82
+; RV32I-NEXT: .LBB12_81:
; RV32I-NEXT: sw s10, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s3, a5, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB12_88:
+; RV32I-NEXT: .LBB12_82:
; RV32I-NEXT: addi ra, a4, -128
; RV32I-NEXT: mv s5, t1
; RV32I-NEXT: mv s6, t5
-; RV32I-NEXT: beqz s2, .LBB12_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: beqz s2, .LBB12_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: mv s5, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB12_90:
+; RV32I-NEXT: .LBB12_84:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, t5, s9
-; RV32I-NEXT: bltu ra, t3, .LBB12_92
-; RV32I-NEXT: # %bb.91:
+; RV32I-NEXT: bltu ra, t3, .LBB12_86
+; RV32I-NEXT: # %bb.85:
; RV32I-NEXT: srl a3, t5, ra
-; RV32I-NEXT: j .LBB12_93
-; RV32I-NEXT: .LBB12_92:
+; RV32I-NEXT: mv s1, t1
+; RV32I-NEXT: bnez ra, .LBB12_87
+; RV32I-NEXT: j .LBB12_88
+; RV32I-NEXT: .LBB12_86:
; RV32I-NEXT: lw a3, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB12_93:
; RV32I-NEXT: mv s1, t1
-; RV32I-NEXT: beqz ra, .LBB12_95
-; RV32I-NEXT: # %bb.94:
+; RV32I-NEXT: beqz ra, .LBB12_88
+; RV32I-NEXT: .LBB12_87:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB12_95:
-; RV32I-NEXT: bltu ra, t3, .LBB12_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: .LBB12_88:
+; RV32I-NEXT: bltu ra, t3, .LBB12_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: srl a3, a6, ra
-; RV32I-NEXT: j .LBB12_98
-; RV32I-NEXT: .LBB12_97:
+; RV32I-NEXT: mv a5, t0
+; RV32I-NEXT: bnez ra, .LBB12_91
+; RV32I-NEXT: j .LBB12_92
+; RV32I-NEXT: .LBB12_90:
; RV32I-NEXT: srl s2, t5, a4
; RV32I-NEXT: sll a3, a6, s9
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB12_98:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz ra, .LBB12_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: beqz ra, .LBB12_92
+; RV32I-NEXT: .LBB12_91:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB12_100:
+; RV32I-NEXT: .LBB12_92:
; RV32I-NEXT: sub s10, t6, ra
-; RV32I-NEXT: bltu ra, t3, .LBB12_103
-; RV32I-NEXT: # %bb.101:
+; RV32I-NEXT: bltu ra, t3, .LBB12_95
+; RV32I-NEXT: # %bb.93:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bgeu s10, t3, .LBB12_104
-; RV32I-NEXT: .LBB12_102:
+; RV32I-NEXT: bgeu s10, t3, .LBB12_96
+; RV32I-NEXT: .LBB12_94:
; RV32I-NEXT: sll s9, t1, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB12_105
-; RV32I-NEXT: .LBB12_103:
+; RV32I-NEXT: j .LBB12_97
+; RV32I-NEXT: .LBB12_95:
; RV32I-NEXT: srl s7, a6, a4
-; RV32I-NEXT: bltu s10, t3, .LBB12_102
-; RV32I-NEXT: .LBB12_104:
+; RV32I-NEXT: bltu s10, t3, .LBB12_94
+; RV32I-NEXT: .LBB12_96:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t1, s10
-; RV32I-NEXT: .LBB12_105:
+; RV32I-NEXT: .LBB12_97:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, t5
-; RV32I-NEXT: beqz s10, .LBB12_107
-; RV32I-NEXT: # %bb.106:
+; RV32I-NEXT: beqz s10, .LBB12_99
+; RV32I-NEXT: # %bb.98:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB12_107:
-; RV32I-NEXT: bltu s11, t3, .LBB12_109
-; RV32I-NEXT: # %bb.108:
+; RV32I-NEXT: .LBB12_99:
+; RV32I-NEXT: bltu s11, t3, .LBB12_101
+; RV32I-NEXT: # %bb.100:
; RV32I-NEXT: srl a3, t5, s11
-; RV32I-NEXT: bnez s11, .LBB12_110
-; RV32I-NEXT: j .LBB12_111
-; RV32I-NEXT: .LBB12_109:
+; RV32I-NEXT: bnez s11, .LBB12_102
+; RV32I-NEXT: j .LBB12_103
+; RV32I-NEXT: .LBB12_101:
; RV32I-NEXT: srl a3, t1, ra
; RV32I-NEXT: neg s10, s11
; RV32I-NEXT: sll s10, t5, s10
; RV32I-NEXT: or a3, a3, s10
-; RV32I-NEXT: beqz s11, .LBB12_111
-; RV32I-NEXT: .LBB12_110:
+; RV32I-NEXT: beqz s11, .LBB12_103
+; RV32I-NEXT: .LBB12_102:
; RV32I-NEXT: mv t1, a3
-; RV32I-NEXT: .LBB12_111:
-; RV32I-NEXT: bltu s11, t3, .LBB12_113
-; RV32I-NEXT: # %bb.112:
+; RV32I-NEXT: .LBB12_103:
+; RV32I-NEXT: bltu s11, t3, .LBB12_105
+; RV32I-NEXT: # %bb.104:
; RV32I-NEXT: li t3, 0
-; RV32I-NEXT: bltu ra, t6, .LBB12_114
-; RV32I-NEXT: j .LBB12_115
-; RV32I-NEXT: .LBB12_113:
+; RV32I-NEXT: bltu ra, t6, .LBB12_106
+; RV32I-NEXT: j .LBB12_107
+; RV32I-NEXT: .LBB12_105:
; RV32I-NEXT: srl t3, t5, ra
-; RV32I-NEXT: bgeu ra, t6, .LBB12_115
-; RV32I-NEXT: .LBB12_114:
+; RV32I-NEXT: bgeu ra, t6, .LBB12_107
+; RV32I-NEXT: .LBB12_106:
; RV32I-NEXT: or t1, a5, s9
; RV32I-NEXT: or t3, s7, s3
-; RV32I-NEXT: .LBB12_115:
+; RV32I-NEXT: .LBB12_107:
; RV32I-NEXT: li a5, 128
-; RV32I-NEXT: bnez ra, .LBB12_122
-; RV32I-NEXT: # %bb.116:
-; RV32I-NEXT: bgeu ra, t6, .LBB12_123
-; RV32I-NEXT: .LBB12_117:
-; RV32I-NEXT: bltu a4, a5, .LBB12_124
-; RV32I-NEXT: .LBB12_118:
+; RV32I-NEXT: bnez ra, .LBB12_114
+; RV32I-NEXT: # %bb.108:
+; RV32I-NEXT: bgeu ra, t6, .LBB12_115
+; RV32I-NEXT: .LBB12_109:
+; RV32I-NEXT: bltu a4, a5, .LBB12_116
+; RV32I-NEXT: .LBB12_110:
; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bnez a4, .LBB12_125
-; RV32I-NEXT: # %bb.119:
-; RV32I-NEXT: bltu a4, a5, .LBB12_121
-; RV32I-NEXT: .LBB12_120:
+; RV32I-NEXT: bnez a4, .LBB12_117
+; RV32I-NEXT: .LBB12_111:
+; RV32I-NEXT: bltu a4, a5, .LBB12_113
+; RV32I-NEXT: .LBB12_112:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: li ra, 0
-; RV32I-NEXT: .LBB12_121:
+; RV32I-NEXT: .LBB12_113:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli t0, s0, 24
@@ -3556,15 +3563,15 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB12_122:
+; RV32I-NEXT: .LBB12_114:
; RV32I-NEXT: mv t0, t1
; RV32I-NEXT: mv a6, t3
-; RV32I-NEXT: bltu ra, t6, .LBB12_117
-; RV32I-NEXT: .LBB12_123:
+; RV32I-NEXT: bltu ra, t6, .LBB12_109
+; RV32I-NEXT: .LBB12_115:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: li s2, 0
-; RV32I-NEXT: bgeu a4, a5, .LBB12_118
-; RV32I-NEXT: .LBB12_124:
+; RV32I-NEXT: bgeu a4, a5, .LBB12_110
+; RV32I-NEXT: .LBB12_116:
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t0, a3, a6
@@ -3574,14 +3581,15 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or s1, a3, s5
; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: j .LBB12_118
-; RV32I-NEXT: .LBB12_125:
+; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: beqz a4, .LBB12_111
+; RV32I-NEXT: .LBB12_117:
; RV32I-NEXT: mv s0, t0
; RV32I-NEXT: mv a0, a6
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t2, s2
-; RV32I-NEXT: bgeu a4, a5, .LBB12_120
-; RV32I-NEXT: j .LBB12_121
+; RV32I-NEXT: bgeu a4, a5, .LBB12_112
+; RV32I-NEXT: j .LBB12_113
%src = load i256, ptr %src.ptr, align 1
%byteOff = load i256, ptr %byteOff.ptr, align 1
%bitOff = shl i256 %byteOff, 3
@@ -4126,271 +4134,278 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s8, 0
; RV32I-NEXT: srl a3, a0, a4
-; RV32I-NEXT: j .LBB13_40
+; RV32I-NEXT: mv a5, s0
+; RV32I-NEXT: bnez a4, .LBB13_40
+; RV32I-NEXT: j .LBB13_41
; RV32I-NEXT: .LBB13_39:
; RV32I-NEXT: srl s8, t2, a4
; RV32I-NEXT: srl a3, s0, a4
; RV32I-NEXT: sll a5, a0, s6
; RV32I-NEXT: or a3, a3, a5
-; RV32I-NEXT: .LBB13_40:
; RV32I-NEXT: mv a5, s0
-; RV32I-NEXT: beqz a4, .LBB13_42
-; RV32I-NEXT: # %bb.41:
+; RV32I-NEXT: beqz a4, .LBB13_41
+; RV32I-NEXT: .LBB13_40:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB13_42:
-; RV32I-NEXT: bltu a4, t3, .LBB13_45
-; RV32I-NEXT: # %bb.43:
+; RV32I-NEXT: .LBB13_41:
+; RV32I-NEXT: bltu a4, t3, .LBB13_44
+; RV32I-NEXT: # %bb.42:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t3, .LBB13_46
-; RV32I-NEXT: .LBB13_44:
+; RV32I-NEXT: bgeu s7, t3, .LBB13_45
+; RV32I-NEXT: .LBB13_43:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
+; RV32I-NEXT: mv s10, t2
+; RV32I-NEXT: bnez s7, .LBB13_46
; RV32I-NEXT: j .LBB13_47
-; RV32I-NEXT: .LBB13_45:
+; RV32I-NEXT: .LBB13_44:
; RV32I-NEXT: srl s1, a0, a4
-; RV32I-NEXT: bltu s7, t3, .LBB13_44
-; RV32I-NEXT: .LBB13_46:
+; RV32I-NEXT: bltu s7, t3, .LBB13_43
+; RV32I-NEXT: .LBB13_45:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: .LBB13_47:
; RV32I-NEXT: mv s10, t2
-; RV32I-NEXT: beqz s7, .LBB13_49
-; RV32I-NEXT: # %bb.48:
+; RV32I-NEXT: beqz s7, .LBB13_47
+; RV32I-NEXT: .LBB13_46:
; RV32I-NEXT: mv s10, a3
-; RV32I-NEXT: .LBB13_49:
-; RV32I-NEXT: bltu s9, t3, .LBB13_51
-; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: .LBB13_47:
+; RV32I-NEXT: bltu s9, t3, .LBB13_49
+; RV32I-NEXT: # %bb.48:
; RV32I-NEXT: srl a3, t2, s9
-; RV32I-NEXT: j .LBB13_52
-; RV32I-NEXT: .LBB13_51:
+; RV32I-NEXT: mv s2, t4
+; RV32I-NEXT: bnez s9, .LBB13_50
+; RV32I-NEXT: j .LBB13_51
+; RV32I-NEXT: .LBB13_49:
; RV32I-NEXT: sll a3, t2, s11
; RV32I-NEXT: or a3, s2, a3
-; RV32I-NEXT: .LBB13_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB13_54
-; RV32I-NEXT: # %bb.53:
+; RV32I-NEXT: beqz s9, .LBB13_51
+; RV32I-NEXT: .LBB13_50:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB13_54:
-; RV32I-NEXT: bltu s9, t3, .LBB13_56
-; RV32I-NEXT: # %bb.55:
+; RV32I-NEXT: .LBB13_51:
+; RV32I-NEXT: bltu s9, t3, .LBB13_53
+; RV32I-NEXT: # %bb.52:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a4, t6, .LBB13_57
-; RV32I-NEXT: j .LBB13_58
-; RV32I-NEXT: .LBB13_56:
+; RV32I-NEXT: bltu a4, t6, .LBB13_54
+; RV32I-NEXT: j .LBB13_55
+; RV32I-NEXT: .LBB13_53:
; RV32I-NEXT: srl s7, t2, a4
-; RV32I-NEXT: bgeu a4, t6, .LBB13_58
-; RV32I-NEXT: .LBB13_57:
+; RV32I-NEXT: bgeu a4, t6, .LBB13_55
+; RV32I-NEXT: .LBB13_54:
; RV32I-NEXT: or s2, a5, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB13_58:
+; RV32I-NEXT: .LBB13_55:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a5, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a4, .LBB13_60
-; RV32I-NEXT: # %bb.59:
+; RV32I-NEXT: beqz a4, .LBB13_57
+; RV32I-NEXT: # %bb.56:
; RV32I-NEXT: mv a5, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB13_60:
+; RV32I-NEXT: .LBB13_57:
; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a4
-; RV32I-NEXT: bltu a4, t6, .LBB13_62
-; RV32I-NEXT: # %bb.61:
+; RV32I-NEXT: bltu a4, t6, .LBB13_59
+; RV32I-NEXT: # %bb.58:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: .LBB13_62:
+; RV32I-NEXT: .LBB13_59:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a5, t0, s3
; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t3, .LBB13_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: bltu s2, t3, .LBB13_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a3, t0, s2
-; RV32I-NEXT: j .LBB13_65
-; RV32I-NEXT: .LBB13_64:
+; RV32I-NEXT: j .LBB13_62
+; RV32I-NEXT: .LBB13_61:
; RV32I-NEXT: sll s10, t0, s6
; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB13_65:
+; RV32I-NEXT: .LBB13_62:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a6
-; RV32I-NEXT: beqz s2, .LBB13_67
-; RV32I-NEXT: # %bb.66:
+; RV32I-NEXT: beqz s2, .LBB13_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB13_67:
-; RV32I-NEXT: bltu s1, t3, .LBB13_69
-; RV32I-NEXT: # %bb.68:
+; RV32I-NEXT: .LBB13_64:
+; RV32I-NEXT: bltu s1, t3, .LBB13_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: srl a3, a6, s1
-; RV32I-NEXT: j .LBB13_70
-; RV32I-NEXT: .LBB13_69:
+; RV32I-NEXT: mv a5, t0
+; RV32I-NEXT: bnez s1, .LBB13_67
+; RV32I-NEXT: j .LBB13_68
+; RV32I-NEXT: .LBB13_66:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a6, a3
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB13_70:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz s1, .LBB13_72
-; RV32I-NEXT: # %bb.71:
+; RV32I-NEXT: beqz s1, .LBB13_68
+; RV32I-NEXT: .LBB13_67:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB13_72:
-; RV32I-NEXT: bltu s1, t3, .LBB13_76
-; RV32I-NEXT: # %bb.73:
+; RV32I-NEXT: .LBB13_68:
+; RV32I-NEXT: bltu s1, t3, .LBB13_71
+; RV32I-NEXT: # %bb.69:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: .LBB13_74:
; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t3, .LBB13_77
-; RV32I-NEXT: # %bb.75:
+; RV32I-NEXT: bgeu s2, t3, .LBB13_72
+; RV32I-NEXT: .LBB13_70:
; RV32I-NEXT: sll s6, t1, s6
; RV32I-NEXT: srl a3, t1, s3
; RV32I-NEXT: lw s3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB13_78
-; RV32I-NEXT: .LBB13_76:
+; RV32I-NEXT: j .LBB13_73
+; RV32I-NEXT: .LBB13_71:
; RV32I-NEXT: srl s1, a6, s3
-; RV32I-NEXT: j .LBB13_74
-; RV32I-NEXT: .LBB13_77:
+; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: bltu s2, t3, .LBB13_70
+; RV32I-NEXT: .LBB13_72:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: .LBB13_78:
+; RV32I-NEXT: .LBB13_73:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, t5
-; RV32I-NEXT: beqz s2, .LBB13_80
-; RV32I-NEXT: # %bb.79:
+; RV32I-NEXT: beqz s2, .LBB13_75
+; RV32I-NEXT: # %bb.74:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB13_80:
-; RV32I-NEXT: bltu s9, t3, .LBB13_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: .LBB13_75:
+; RV32I-NEXT: bltu s9, t3, .LBB13_77
+; RV32I-NEXT: # %bb.76:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t0, s9
-; RV32I-NEXT: j .LBB13_83
-; RV32I-NEXT: .LBB13_82:
+; RV32I-NEXT: mv s7, a6
+; RV32I-NEXT: bnez s9, .LBB13_78
+; RV32I-NEXT: j .LBB13_79
+; RV32I-NEXT: .LBB13_77:
; RV32I-NEXT: sll s3, t0, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t0, a3
; RV32I-NEXT: sll s7, a6, s2
; RV32I-NEXT: or a3, a3, s7
-; RV32I-NEXT: .LBB13_83:
; RV32I-NEXT: mv s7, a6
-; RV32I-NEXT: beqz s9, .LBB13_85
-; RV32I-NEXT: # %bb.84:
+; RV32I-NEXT: beqz s9, .LBB13_79
+; RV32I-NEXT: .LBB13_78:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB13_85:
-; RV32I-NEXT: bltu s2, t6, .LBB13_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: .LBB13_79:
+; RV32I-NEXT: bltu s2, t6, .LBB13_81
+; RV32I-NEXT: # %bb.80:
; RV32I-NEXT: sw zero, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB13_88
-; RV32I-NEXT: .LBB13_87:
+; RV32I-NEXT: j .LBB13_82
+; RV32I-NEXT: .LBB13_81:
; RV32I-NEXT: sw s10, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s3, a5, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB13_88:
+; RV32I-NEXT: .LBB13_82:
; RV32I-NEXT: addi ra, a4, -128
; RV32I-NEXT: mv s5, t1
; RV32I-NEXT: mv s6, t5
-; RV32I-NEXT: beqz s2, .LBB13_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: beqz s2, .LBB13_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: mv s5, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB13_90:
+; RV32I-NEXT: .LBB13_84:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, t5, s9
-; RV32I-NEXT: bltu ra, t3, .LBB13_92
-; RV32I-NEXT: # %bb.91:
+; RV32I-NEXT: bltu ra, t3, .LBB13_86
+; RV32I-NEXT: # %bb.85:
; RV32I-NEXT: srl a3, t5, ra
-; RV32I-NEXT: j .LBB13_93
-; RV32I-NEXT: .LBB13_92:
+; RV32I-NEXT: mv s1, t1
+; RV32I-NEXT: bnez ra, .LBB13_87
+; RV32I-NEXT: j .LBB13_88
+; RV32I-NEXT: .LBB13_86:
; RV32I-NEXT: lw a3, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB13_93:
; RV32I-NEXT: mv s1, t1
-; RV32I-NEXT: beqz ra, .LBB13_95
-; RV32I-NEXT: # %bb.94:
+; RV32I-NEXT: beqz ra, .LBB13_88
+; RV32I-NEXT: .LBB13_87:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB13_95:
-; RV32I-NEXT: bltu ra, t3, .LBB13_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: .LBB13_88:
+; RV32I-NEXT: bltu ra, t3, .LBB13_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: srl a3, a6, ra
-; RV32I-NEXT: j .LBB13_98
-; RV32I-NEXT: .LBB13_97:
+; RV32I-NEXT: mv a5, t0
+; RV32I-NEXT: bnez ra, .LBB13_91
+; RV32I-NEXT: j .LBB13_92
+; RV32I-NEXT: .LBB13_90:
; RV32I-NEXT: srl s2, t5, a4
; RV32I-NEXT: sll a3, a6, s9
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB13_98:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz ra, .LBB13_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: beqz ra, .LBB13_92
+; RV32I-NEXT: .LBB13_91:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB13_100:
+; RV32I-NEXT: .LBB13_92:
; RV32I-NEXT: sub s10, t6, ra
-; RV32I-NEXT: bltu ra, t3, .LBB13_103
-; RV32I-NEXT: # %bb.101:
+; RV32I-NEXT: bltu ra, t3, .LBB13_95
+; RV32I-NEXT: # %bb.93:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bgeu s10, t3, .LBB13_104
-; RV32I-NEXT: .LBB13_102:
+; RV32I-NEXT: bgeu s10, t3, .LBB13_96
+; RV32I-NEXT: .LBB13_94:
; RV32I-NEXT: sll s9, t1, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB13_105
-; RV32I-NEXT: .LBB13_103:
+; RV32I-NEXT: j .LBB13_97
+; RV32I-NEXT: .LBB13_95:
; RV32I-NEXT: srl s7, a6, a4
-; RV32I-NEXT: bltu s10, t3, .LBB13_102
-; RV32I-NEXT: .LBB13_104:
+; RV32I-NEXT: bltu s10, t3, .LBB13_94
+; RV32I-NEXT: .LBB13_96:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t1, s10
-; RV32I-NEXT: .LBB13_105:
+; RV32I-NEXT: .LBB13_97:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, t5
-; RV32I-NEXT: beqz s10, .LBB13_107
-; RV32I-NEXT: # %bb.106:
+; RV32I-NEXT: beqz s10, .LBB13_99
+; RV32I-NEXT: # %bb.98:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB13_107:
-; RV32I-NEXT: bltu s11, t3, .LBB13_109
-; RV32I-NEXT: # %bb.108:
+; RV32I-NEXT: .LBB13_99:
+; RV32I-NEXT: bltu s11, t3, .LBB13_101
+; RV32I-NEXT: # %bb.100:
; RV32I-NEXT: srl a3, t5, s11
-; RV32I-NEXT: bnez s11, .LBB13_110
-; RV32I-NEXT: j .LBB13_111
-; RV32I-NEXT: .LBB13_109:
+; RV32I-NEXT: bnez s11, .LBB13_102
+; RV32I-NEXT: j .LBB13_103
+; RV32I-NEXT: .LBB13_101:
; RV32I-NEXT: srl a3, t1, ra
; RV32I-NEXT: neg s10, s11
; RV32I-NEXT: sll s10, t5, s10
; RV32I-NEXT: or a3, a3, s10
-; RV32I-NEXT: beqz s11, .LBB13_111
-; RV32I-NEXT: .LBB13_110:
+; RV32I-NEXT: beqz s11, .LBB13_103
+; RV32I-NEXT: .LBB13_102:
; RV32I-NEXT: mv t1, a3
-; RV32I-NEXT: .LBB13_111:
-; RV32I-NEXT: bltu s11, t3, .LBB13_113
-; RV32I-NEXT: # %bb.112:
+; RV32I-NEXT: .LBB13_103:
+; RV32I-NEXT: bltu s11, t3, .LBB13_105
+; RV32I-NEXT: # %bb.104:
; RV32I-NEXT: li t3, 0
-; RV32I-NEXT: bltu ra, t6, .LBB13_114
-; RV32I-NEXT: j .LBB13_115
-; RV32I-NEXT: .LBB13_113:
+; RV32I-NEXT: bltu ra, t6, .LBB13_106
+; RV32I-NEXT: j .LBB13_107
+; RV32I-NEXT: .LBB13_105:
; RV32I-NEXT: srl t3, t5, ra
-; RV32I-NEXT: bgeu ra, t6, .LBB13_115
-; RV32I-NEXT: .LBB13_114:
+; RV32I-NEXT: bgeu ra, t6, .LBB13_107
+; RV32I-NEXT: .LBB13_106:
; RV32I-NEXT: or t1, a5, s9
; RV32I-NEXT: or t3, s7, s3
-; RV32I-NEXT: .LBB13_115:
+; RV32I-NEXT: .LBB13_107:
; RV32I-NEXT: li a5, 128
-; RV32I-NEXT: bnez ra, .LBB13_122
-; RV32I-NEXT: # %bb.116:
-; RV32I-NEXT: bgeu ra, t6, .LBB13_123
-; RV32I-NEXT: .LBB13_117:
-; RV32I-NEXT: bltu a4, a5, .LBB13_124
-; RV32I-NEXT: .LBB13_118:
+; RV32I-NEXT: bnez ra, .LBB13_114
+; RV32I-NEXT: # %bb.108:
+; RV32I-NEXT: bgeu ra, t6, .LBB13_115
+; RV32I-NEXT: .LBB13_109:
+; RV32I-NEXT: bltu a4, a5, .LBB13_116
+; RV32I-NEXT: .LBB13_110:
; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bnez a4, .LBB13_125
-; RV32I-NEXT: # %bb.119:
-; RV32I-NEXT: bltu a4, a5, .LBB13_121
-; RV32I-NEXT: .LBB13_120:
+; RV32I-NEXT: bnez a4, .LBB13_117
+; RV32I-NEXT: .LBB13_111:
+; RV32I-NEXT: bltu a4, a5, .LBB13_113
+; RV32I-NEXT: .LBB13_112:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: li ra, 0
-; RV32I-NEXT: .LBB13_121:
+; RV32I-NEXT: .LBB13_113:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli t0, s0, 24
@@ -4472,15 +4487,15 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB13_122:
+; RV32I-NEXT: .LBB13_114:
; RV32I-NEXT: mv t0, t1
; RV32I-NEXT: mv a6, t3
-; RV32I-NEXT: bltu ra, t6, .LBB13_117
-; RV32I-NEXT: .LBB13_123:
+; RV32I-NEXT: bltu ra, t6, .LBB13_109
+; RV32I-NEXT: .LBB13_115:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: li s2, 0
-; RV32I-NEXT: bgeu a4, a5, .LBB13_118
-; RV32I-NEXT: .LBB13_124:
+; RV32I-NEXT: bgeu a4, a5, .LBB13_110
+; RV32I-NEXT: .LBB13_116:
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t0, a3, a6
@@ -4490,14 +4505,15 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: or s1, a3, s5
; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: j .LBB13_118
-; RV32I-NEXT: .LBB13_125:
+; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: beqz a4, .LBB13_111
+; RV32I-NEXT: .LBB13_117:
; RV32I-NEXT: mv s0, t0
; RV32I-NEXT: mv a0, a6
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t2, s2
-; RV32I-NEXT: bgeu a4, a5, .LBB13_120
-; RV32I-NEXT: j .LBB13_121
+; RV32I-NEXT: bgeu a4, a5, .LBB13_112
+; RV32I-NEXT: j .LBB13_113
%src = load i256, ptr %src.ptr, align 1
%wordOff = load i256, ptr %wordOff.ptr, align 1
%bitOff = shl i256 %wordOff, 5
@@ -5042,271 +5058,278 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s8, 0
; RV32I-NEXT: srl a3, a0, a4
-; RV32I-NEXT: j .LBB14_40
+; RV32I-NEXT: mv a5, s0
+; RV32I-NEXT: bnez a4, .LBB14_40
+; RV32I-NEXT: j .LBB14_41
; RV32I-NEXT: .LBB14_39:
; RV32I-NEXT: srl s8, t2, a4
; RV32I-NEXT: srl a3, s0, a4
; RV32I-NEXT: sll a5, a0, s6
; RV32I-NEXT: or a3, a3, a5
-; RV32I-NEXT: .LBB14_40:
; RV32I-NEXT: mv a5, s0
-; RV32I-NEXT: beqz a4, .LBB14_42
-; RV32I-NEXT: # %bb.41:
+; RV32I-NEXT: beqz a4, .LBB14_41
+; RV32I-NEXT: .LBB14_40:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB14_42:
-; RV32I-NEXT: bltu a4, t3, .LBB14_45
-; RV32I-NEXT: # %bb.43:
+; RV32I-NEXT: .LBB14_41:
+; RV32I-NEXT: bltu a4, t3, .LBB14_44
+; RV32I-NEXT: # %bb.42:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t3, .LBB14_46
-; RV32I-NEXT: .LBB14_44:
+; RV32I-NEXT: bgeu s7, t3, .LBB14_45
+; RV32I-NEXT: .LBB14_43:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
+; RV32I-NEXT: mv s10, t2
+; RV32I-NEXT: bnez s7, .LBB14_46
; RV32I-NEXT: j .LBB14_47
-; RV32I-NEXT: .LBB14_45:
+; RV32I-NEXT: .LBB14_44:
; RV32I-NEXT: srl s1, a0, a4
-; RV32I-NEXT: bltu s7, t3, .LBB14_44
-; RV32I-NEXT: .LBB14_46:
+; RV32I-NEXT: bltu s7, t3, .LBB14_43
+; RV32I-NEXT: .LBB14_45:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: .LBB14_47:
; RV32I-NEXT: mv s10, t2
-; RV32I-NEXT: beqz s7, .LBB14_49
-; RV32I-NEXT: # %bb.48:
+; RV32I-NEXT: beqz s7, .LBB14_47
+; RV32I-NEXT: .LBB14_46:
; RV32I-NEXT: mv s10, a3
-; RV32I-NEXT: .LBB14_49:
-; RV32I-NEXT: bltu s9, t3, .LBB14_51
-; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: .LBB14_47:
+; RV32I-NEXT: bltu s9, t3, .LBB14_49
+; RV32I-NEXT: # %bb.48:
; RV32I-NEXT: srl a3, t2, s9
-; RV32I-NEXT: j .LBB14_52
-; RV32I-NEXT: .LBB14_51:
+; RV32I-NEXT: mv s2, t4
+; RV32I-NEXT: bnez s9, .LBB14_50
+; RV32I-NEXT: j .LBB14_51
+; RV32I-NEXT: .LBB14_49:
; RV32I-NEXT: sll a3, t2, s11
; RV32I-NEXT: or a3, s2, a3
-; RV32I-NEXT: .LBB14_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB14_54
-; RV32I-NEXT: # %bb.53:
+; RV32I-NEXT: beqz s9, .LBB14_51
+; RV32I-NEXT: .LBB14_50:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB14_54:
-; RV32I-NEXT: bltu s9, t3, .LBB14_56
-; RV32I-NEXT: # %bb.55:
+; RV32I-NEXT: .LBB14_51:
+; RV32I-NEXT: bltu s9, t3, .LBB14_53
+; RV32I-NEXT: # %bb.52:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a4, t6, .LBB14_57
-; RV32I-NEXT: j .LBB14_58
-; RV32I-NEXT: .LBB14_56:
+; RV32I-NEXT: bltu a4, t6, .LBB14_54
+; RV32I-NEXT: j .LBB14_55
+; RV32I-NEXT: .LBB14_53:
; RV32I-NEXT: srl s7, t2, a4
-; RV32I-NEXT: bgeu a4, t6, .LBB14_58
-; RV32I-NEXT: .LBB14_57:
+; RV32I-NEXT: bgeu a4, t6, .LBB14_55
+; RV32I-NEXT: .LBB14_54:
; RV32I-NEXT: or s2, a5, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB14_58:
+; RV32I-NEXT: .LBB14_55:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a5, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a4, .LBB14_60
-; RV32I-NEXT: # %bb.59:
+; RV32I-NEXT: beqz a4, .LBB14_57
+; RV32I-NEXT: # %bb.56:
; RV32I-NEXT: mv a5, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB14_60:
+; RV32I-NEXT: .LBB14_57:
; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a4
-; RV32I-NEXT: bltu a4, t6, .LBB14_62
-; RV32I-NEXT: # %bb.61:
+; RV32I-NEXT: bltu a4, t6, .LBB14_59
+; RV32I-NEXT: # %bb.58:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: .LBB14_62:
+; RV32I-NEXT: .LBB14_59:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a5, t0, s3
; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t3, .LBB14_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: bltu s2, t3, .LBB14_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a3, t0, s2
-; RV32I-NEXT: j .LBB14_65
-; RV32I-NEXT: .LBB14_64:
+; RV32I-NEXT: j .LBB14_62
+; RV32I-NEXT: .LBB14_61:
; RV32I-NEXT: sll s10, t0, s6
; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB14_65:
+; RV32I-NEXT: .LBB14_62:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a6
-; RV32I-NEXT: beqz s2, .LBB14_67
-; RV32I-NEXT: # %bb.66:
+; RV32I-NEXT: beqz s2, .LBB14_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB14_67:
-; RV32I-NEXT: bltu s1, t3, .LBB14_69
-; RV32I-NEXT: # %bb.68:
+; RV32I-NEXT: .LBB14_64:
+; RV32I-NEXT: bltu s1, t3, .LBB14_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: srl a3, a6, s1
-; RV32I-NEXT: j .LBB14_70
-; RV32I-NEXT: .LBB14_69:
+; RV32I-NEXT: mv a5, t0
+; RV32I-NEXT: bnez s1, .LBB14_67
+; RV32I-NEXT: j .LBB14_68
+; RV32I-NEXT: .LBB14_66:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a6, a3
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB14_70:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz s1, .LBB14_72
-; RV32I-NEXT: # %bb.71:
+; RV32I-NEXT: beqz s1, .LBB14_68
+; RV32I-NEXT: .LBB14_67:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB14_72:
-; RV32I-NEXT: bltu s1, t3, .LBB14_76
-; RV32I-NEXT: # %bb.73:
+; RV32I-NEXT: .LBB14_68:
+; RV32I-NEXT: bltu s1, t3, .LBB14_71
+; RV32I-NEXT: # %bb.69:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: .LBB14_74:
; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t3, .LBB14_77
-; RV32I-NEXT: # %bb.75:
+; RV32I-NEXT: bgeu s2, t3, .LBB14_72
+; RV32I-NEXT: .LBB14_70:
; RV32I-NEXT: sll s6, t1, s6
; RV32I-NEXT: srl a3, t1, s3
; RV32I-NEXT: lw s3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB14_78
-; RV32I-NEXT: .LBB14_76:
+; RV32I-NEXT: j .LBB14_73
+; RV32I-NEXT: .LBB14_71:
; RV32I-NEXT: srl s1, a6, s3
-; RV32I-NEXT: j .LBB14_74
-; RV32I-NEXT: .LBB14_77:
+; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: bltu s2, t3, .LBB14_70
+; RV32I-NEXT: .LBB14_72:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: .LBB14_78:
+; RV32I-NEXT: .LBB14_73:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, t5
-; RV32I-NEXT: beqz s2, .LBB14_80
-; RV32I-NEXT: # %bb.79:
+; RV32I-NEXT: beqz s2, .LBB14_75
+; RV32I-NEXT: # %bb.74:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB14_80:
-; RV32I-NEXT: bltu s9, t3, .LBB14_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: .LBB14_75:
+; RV32I-NEXT: bltu s9, t3, .LBB14_77
+; RV32I-NEXT: # %bb.76:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t0, s9
-; RV32I-NEXT: j .LBB14_83
-; RV32I-NEXT: .LBB14_82:
+; RV32I-NEXT: mv s7, a6
+; RV32I-NEXT: bnez s9, .LBB14_78
+; RV32I-NEXT: j .LBB14_79
+; RV32I-NEXT: .LBB14_77:
; RV32I-NEXT: sll s3, t0, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t0, a3
; RV32I-NEXT: sll s7, a6, s2
; RV32I-NEXT: or a3, a3, s7
-; RV32I-NEXT: .LBB14_83:
; RV32I-NEXT: mv s7, a6
-; RV32I-NEXT: beqz s9, .LBB14_85
-; RV32I-NEXT: # %bb.84:
+; RV32I-NEXT: beqz s9, .LBB14_79
+; RV32I-NEXT: .LBB14_78:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB14_85:
-; RV32I-NEXT: bltu s2, t6, .LBB14_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: .LBB14_79:
+; RV32I-NEXT: bltu s2, t6, .LBB14_81
+; RV32I-NEXT: # %bb.80:
; RV32I-NEXT: sw zero, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB14_88
-; RV32I-NEXT: .LBB14_87:
+; RV32I-NEXT: j .LBB14_82
+; RV32I-NEXT: .LBB14_81:
; RV32I-NEXT: sw s10, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s3, a5, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB14_88:
+; RV32I-NEXT: .LBB14_82:
; RV32I-NEXT: addi ra, a4, -128
; RV32I-NEXT: mv s5, t1
; RV32I-NEXT: mv s6, t5
-; RV32I-NEXT: beqz s2, .LBB14_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: beqz s2, .LBB14_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: mv s5, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB14_90:
+; RV32I-NEXT: .LBB14_84:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, t5, s9
-; RV32I-NEXT: bltu ra, t3, .LBB14_92
-; RV32I-NEXT: # %bb.91:
+; RV32I-NEXT: bltu ra, t3, .LBB14_86
+; RV32I-NEXT: # %bb.85:
; RV32I-NEXT: srl a3, t5, ra
-; RV32I-NEXT: j .LBB14_93
-; RV32I-NEXT: .LBB14_92:
+; RV32I-NEXT: mv s1, t1
+; RV32I-NEXT: bnez ra, .LBB14_87
+; RV32I-NEXT: j .LBB14_88
+; RV32I-NEXT: .LBB14_86:
; RV32I-NEXT: lw a3, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB14_93:
; RV32I-NEXT: mv s1, t1
-; RV32I-NEXT: beqz ra, .LBB14_95
-; RV32I-NEXT: # %bb.94:
+; RV32I-NEXT: beqz ra, .LBB14_88
+; RV32I-NEXT: .LBB14_87:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB14_95:
-; RV32I-NEXT: bltu ra, t3, .LBB14_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: .LBB14_88:
+; RV32I-NEXT: bltu ra, t3, .LBB14_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: srl a3, a6, ra
-; RV32I-NEXT: j .LBB14_98
-; RV32I-NEXT: .LBB14_97:
+; RV32I-NEXT: mv a5, t0
+; RV32I-NEXT: bnez ra, .LBB14_91
+; RV32I-NEXT: j .LBB14_92
+; RV32I-NEXT: .LBB14_90:
; RV32I-NEXT: srl s2, t5, a4
; RV32I-NEXT: sll a3, a6, s9
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: .LBB14_98:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz ra, .LBB14_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: beqz ra, .LBB14_92
+; RV32I-NEXT: .LBB14_91:
; RV32I-NEXT: mv a5, a3
-; RV32I-NEXT: .LBB14_100:
+; RV32I-NEXT: .LBB14_92:
; RV32I-NEXT: sub s10, t6, ra
-; RV32I-NEXT: bltu ra, t3, .LBB14_103
-; RV32I-NEXT: # %bb.101:
+; RV32I-NEXT: bltu ra, t3, .LBB14_95
+; RV32I-NEXT: # %bb.93:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bgeu s10, t3, .LBB14_104
-; RV32I-NEXT: .LBB14_102:
+; RV32I-NEXT: bgeu s10, t3, .LBB14_96
+; RV32I-NEXT: .LBB14_94:
; RV32I-NEXT: sll s9, t1, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB14_105
-; RV32I-NEXT: .LBB14_103:
+; RV32I-NEXT: j .LBB14_97
+; RV32I-NEXT: .LBB14_95:
; RV32I-NEXT: srl s7, a6, a4
-; RV32I-NEXT: bltu s10, t3, .LBB14_102
-; RV32I-NEXT: .LBB14_104:
+; RV32I-NEXT: bltu s10, t3, .LBB14_94
+; RV32I-NEXT: .LBB14_96:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t1, s10
-; RV32I-NEXT: .LBB14_105:
+; RV32I-NEXT: .LBB14_97:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, t5
-; RV32I-NEXT: beqz s10, .LBB14_107
-; RV32I-NEXT: # %bb.106:
+; RV32I-NEXT: beqz s10, .LBB14_99
+; RV32I-NEXT: # %bb.98:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB14_107:
-; RV32I-NEXT: bltu s11, t3, .LBB14_109
-; RV32I-NEXT: # %bb.108:
+; RV32I-NEXT: .LBB14_99:
+; RV32I-NEXT: bltu s11, t3, .LBB14_101
+; RV32I-NEXT: # %bb.100:
; RV32I-NEXT: srl a3, t5, s11
-; RV32I-NEXT: bnez s11, .LBB14_110
-; RV32I-NEXT: j .LBB14_111
-; RV32I-NEXT: .LBB14_109:
+; RV32I-NEXT: bnez s11, .LBB14_102
+; RV32I-NEXT: j .LBB14_103
+; RV32I-NEXT: .LBB14_101:
; RV32I-NEXT: srl a3, t1, ra
; RV32I-NEXT: neg s10, s11
; RV32I-NEXT: sll s10, t5, s10
; RV32I-NEXT: or a3, a3, s10
-; RV32I-NEXT: beqz s11, .LBB14_111
-; RV32I-NEXT: .LBB14_110:
+; RV32I-NEXT: beqz s11, .LBB14_103
+; RV32I-NEXT: .LBB14_102:
; RV32I-NEXT: mv t1, a3
-; RV32I-NEXT: .LBB14_111:
-; RV32I-NEXT: bltu s11, t3, .LBB14_113
-; RV32I-NEXT: # %bb.112:
+; RV32I-NEXT: .LBB14_103:
+; RV32I-NEXT: bltu s11, t3, .LBB14_105
+; RV32I-NEXT: # %bb.104:
; RV32I-NEXT: li t3, 0
-; RV32I-NEXT: bltu ra, t6, .LBB14_114
-; RV32I-NEXT: j .LBB14_115
-; RV32I-NEXT: .LBB14_113:
+; RV32I-NEXT: bltu ra, t6, .LBB14_106
+; RV32I-NEXT: j .LBB14_107
+; RV32I-NEXT: .LBB14_105:
; RV32I-NEXT: srl t3, t5, ra
-; RV32I-NEXT: bgeu ra, t6, .LBB14_115
-; RV32I-NEXT: .LBB14_114:
+; RV32I-NEXT: bgeu ra, t6, .LBB14_107
+; RV32I-NEXT: .LBB14_106:
; RV32I-NEXT: or t1, a5, s9
; RV32I-NEXT: or t3, s7, s3
-; RV32I-NEXT: .LBB14_115:
+; RV32I-NEXT: .LBB14_107:
; RV32I-NEXT: li a5, 128
-; RV32I-NEXT: bnez ra, .LBB14_122
-; RV32I-NEXT: # %bb.116:
-; RV32I-NEXT: bgeu ra, t6, .LBB14_123
-; RV32I-NEXT: .LBB14_117:
-; RV32I-NEXT: bltu a4, a5, .LBB14_124
-; RV32I-NEXT: .LBB14_118:
+; RV32I-NEXT: bnez ra, .LBB14_114
+; RV32I-NEXT: # %bb.108:
+; RV32I-NEXT: bgeu ra, t6, .LBB14_115
+; RV32I-NEXT: .LBB14_109:
+; RV32I-NEXT: bltu a4, a5, .LBB14_116
+; RV32I-NEXT: .LBB14_110:
; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bnez a4, .LBB14_125
-; RV32I-NEXT: # %bb.119:
-; RV32I-NEXT: bltu a4, a5, .LBB14_121
-; RV32I-NEXT: .LBB14_120:
+; RV32I-NEXT: bnez a4, .LBB14_117
+; RV32I-NEXT: .LBB14_111:
+; RV32I-NEXT: bltu a4, a5, .LBB14_113
+; RV32I-NEXT: .LBB14_112:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: li ra, 0
-; RV32I-NEXT: .LBB14_121:
+; RV32I-NEXT: .LBB14_113:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli t0, s0, 24
@@ -5388,15 +5411,15 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB14_122:
+; RV32I-NEXT: .LBB14_114:
; RV32I-NEXT: mv t0, t1
; RV32I-NEXT: mv a6, t3
-; RV32I-NEXT: bltu ra, t6, .LBB14_117
-; RV32I-NEXT: .LBB14_123:
+; RV32I-NEXT: bltu ra, t6, .LBB14_109
+; RV32I-NEXT: .LBB14_115:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: li s2, 0
-; RV32I-NEXT: bgeu a4, a5, .LBB14_118
-; RV32I-NEXT: .LBB14_124:
+; RV32I-NEXT: bgeu a4, a5, .LBB14_110
+; RV32I-NEXT: .LBB14_116:
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t0, a3, a6
@@ -5406,14 +5429,15 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: or s1, a3, s5
; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: j .LBB14_118
-; RV32I-NEXT: .LBB14_125:
+; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: beqz a4, .LBB14_111
+; RV32I-NEXT: .LBB14_117:
; RV32I-NEXT: mv s0, t0
; RV32I-NEXT: mv a0, a6
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t2, s2
-; RV32I-NEXT: bgeu a4, a5, .LBB14_120
-; RV32I-NEXT: j .LBB14_121
+; RV32I-NEXT: bgeu a4, a5, .LBB14_112
+; RV32I-NEXT: j .LBB14_113
%src = load i256, ptr %src.ptr, align 1
%dwordOff = load i256, ptr %dwordOff.ptr, align 1
%bitOff = shl i256 %dwordOff, 6
@@ -5860,112 +5884,115 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: # %bb.19:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: sll a1, t3, s7
-; RV32I-NEXT: j .LBB15_21
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: bnez s7, .LBB15_21
+; RV32I-NEXT: j .LBB15_22
; RV32I-NEXT: .LBB15_20:
; RV32I-NEXT: sll s2, t3, a4
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: or a1, a1, s5
-; RV32I-NEXT: .LBB15_21:
; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: beqz s7, .LBB15_23
-; RV32I-NEXT: # %bb.22:
+; RV32I-NEXT: beqz s7, .LBB15_22
+; RV32I-NEXT: .LBB15_21:
; RV32I-NEXT: mv s4, a1
-; RV32I-NEXT: .LBB15_23:
+; RV32I-NEXT: .LBB15_22:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: bltu a4, s9, .LBB15_25
-; RV32I-NEXT: # %bb.24:
+; RV32I-NEXT: bltu a4, s9, .LBB15_24
+; RV32I-NEXT: # %bb.23:
; RV32I-NEXT: sw zero, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: j .LBB15_26
-; RV32I-NEXT: .LBB15_25:
+; RV32I-NEXT: j .LBB15_25
+; RV32I-NEXT: .LBB15_24:
; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, a6, s1
; RV32I-NEXT: or s4, a7, s3
-; RV32I-NEXT: .LBB15_26:
+; RV32I-NEXT: .LBB15_25:
; RV32I-NEXT: sub ra, a1, a4
; RV32I-NEXT: mv a7, t1
; RV32I-NEXT: mv a6, t2
-; RV32I-NEXT: beqz a4, .LBB15_28
-; RV32I-NEXT: # %bb.27:
+; RV32I-NEXT: beqz a4, .LBB15_27
+; RV32I-NEXT: # %bb.26:
; RV32I-NEXT: mv a7, s2
; RV32I-NEXT: mv a6, s4
-; RV32I-NEXT: .LBB15_28:
+; RV32I-NEXT: .LBB15_27:
; RV32I-NEXT: neg s1, ra
; RV32I-NEXT: sll s2, t2, s1
-; RV32I-NEXT: bltu ra, t4, .LBB15_30
-; RV32I-NEXT: # %bb.29:
+; RV32I-NEXT: bltu ra, t4, .LBB15_29
+; RV32I-NEXT: # %bb.28:
; RV32I-NEXT: srl a1, t2, ra
+; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: bnez ra, .LBB15_30
; RV32I-NEXT: j .LBB15_31
-; RV32I-NEXT: .LBB15_30:
+; RV32I-NEXT: .LBB15_29:
; RV32I-NEXT: or a1, s0, s2
-; RV32I-NEXT: .LBB15_31:
; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: beqz ra, .LBB15_33
-; RV32I-NEXT: # %bb.32:
+; RV32I-NEXT: beqz ra, .LBB15_31
+; RV32I-NEXT: .LBB15_30:
; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .LBB15_33:
-; RV32I-NEXT: bltu ra, t4, .LBB15_35
-; RV32I-NEXT: # %bb.34:
+; RV32I-NEXT: .LBB15_31:
+; RV32I-NEXT: bltu ra, t4, .LBB15_33
+; RV32I-NEXT: # %bb.32:
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: srl a1, a5, ra
-; RV32I-NEXT: j .LBB15_36
-; RV32I-NEXT: .LBB15_35:
+; RV32I-NEXT: mv t5, t3
+; RV32I-NEXT: bnez ra, .LBB15_34
+; RV32I-NEXT: j .LBB15_35
+; RV32I-NEXT: .LBB15_33:
; RV32I-NEXT: srl a1, t2, s10
; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, a5, s1
; RV32I-NEXT: or a1, t5, a1
-; RV32I-NEXT: .LBB15_36:
; RV32I-NEXT: mv t5, t3
-; RV32I-NEXT: beqz ra, .LBB15_38
-; RV32I-NEXT: # %bb.37:
+; RV32I-NEXT: beqz ra, .LBB15_35
+; RV32I-NEXT: .LBB15_34:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB15_38:
+; RV32I-NEXT: .LBB15_35:
; RV32I-NEXT: sub s3, s9, ra
-; RV32I-NEXT: bltu ra, t4, .LBB15_41
-; RV32I-NEXT: # %bb.39:
+; RV32I-NEXT: bltu ra, t4, .LBB15_38
+; RV32I-NEXT: # %bb.36:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s3, t4, .LBB15_42
-; RV32I-NEXT: .LBB15_40:
+; RV32I-NEXT: bgeu s3, t4, .LBB15_39
+; RV32I-NEXT: .LBB15_37:
; RV32I-NEXT: sll s1, t1, s1
; RV32I-NEXT: neg a1, s3
; RV32I-NEXT: srl a1, t1, a1
; RV32I-NEXT: or a1, a1, s2
-; RV32I-NEXT: j .LBB15_43
-; RV32I-NEXT: .LBB15_41:
+; RV32I-NEXT: j .LBB15_40
+; RV32I-NEXT: .LBB15_38:
; RV32I-NEXT: srl a1, a5, s10
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s3, t4, .LBB15_40
-; RV32I-NEXT: .LBB15_42:
+; RV32I-NEXT: bltu s3, t4, .LBB15_37
+; RV32I-NEXT: .LBB15_39:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: sll a1, t1, s3
-; RV32I-NEXT: .LBB15_43:
+; RV32I-NEXT: .LBB15_40:
; RV32I-NEXT: addi s4, ra, -64
; RV32I-NEXT: mv s2, t2
-; RV32I-NEXT: beqz s3, .LBB15_45
-; RV32I-NEXT: # %bb.44:
+; RV32I-NEXT: beqz s3, .LBB15_42
+; RV32I-NEXT: # %bb.41:
; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: .LBB15_45:
+; RV32I-NEXT: .LBB15_42:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s5, a7
-; RV32I-NEXT: bltu s4, t4, .LBB15_47
-; RV32I-NEXT: # %bb.46:
+; RV32I-NEXT: bltu s4, t4, .LBB15_44
+; RV32I-NEXT: # %bb.43:
; RV32I-NEXT: srl t0, t2, s4
-; RV32I-NEXT: j .LBB15_48
-; RV32I-NEXT: .LBB15_47:
+; RV32I-NEXT: j .LBB15_45
+; RV32I-NEXT: .LBB15_44:
; RV32I-NEXT: srl a1, t1, ra
; RV32I-NEXT: neg t0, s4
; RV32I-NEXT: sll t0, t2, t0
; RV32I-NEXT: or t0, a1, t0
-; RV32I-NEXT: .LBB15_48:
+; RV32I-NEXT: .LBB15_45:
; RV32I-NEXT: mv s0, s10
; RV32I-NEXT: mv a7, a6
; RV32I-NEXT: lbu s8, 19(a0)
; RV32I-NEXT: lbu a1, 23(a0)
; RV32I-NEXT: mv s3, t1
-; RV32I-NEXT: beqz s4, .LBB15_50
-; RV32I-NEXT: # %bb.49:
+; RV32I-NEXT: beqz s4, .LBB15_47
+; RV32I-NEXT: # %bb.46:
; RV32I-NEXT: mv s3, t0
-; RV32I-NEXT: .LBB15_50:
+; RV32I-NEXT: .LBB15_47:
; RV32I-NEXT: mv a6, a3
; RV32I-NEXT: lbu s10, 17(a0)
; RV32I-NEXT: lbu t0, 18(a0)
@@ -5974,25 +6001,25 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: slli a1, a1, 8
; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: bltu s4, t4, .LBB15_52
-; RV32I-NEXT: # %bb.51:
+; RV32I-NEXT: bltu s4, t4, .LBB15_49
+; RV32I-NEXT: # %bb.48:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB15_53
-; RV32I-NEXT: .LBB15_52:
+; RV32I-NEXT: j .LBB15_50
+; RV32I-NEXT: .LBB15_49:
; RV32I-NEXT: srl s4, t2, ra
-; RV32I-NEXT: .LBB15_53:
+; RV32I-NEXT: .LBB15_50:
; RV32I-NEXT: or s11, s8, t0
; RV32I-NEXT: lbu t0, 16(a0)
; RV32I-NEXT: lbu s8, 20(a0)
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: or t6, a1, t6
-; RV32I-NEXT: bgeu ra, a3, .LBB15_55
-; RV32I-NEXT: # %bb.54:
+; RV32I-NEXT: bgeu ra, a3, .LBB15_52
+; RV32I-NEXT: # %bb.51:
; RV32I-NEXT: or s3, t5, s1
; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s4, a1, s2
-; RV32I-NEXT: .LBB15_55:
+; RV32I-NEXT: .LBB15_52:
; RV32I-NEXT: or a1, s10, t0
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: or t0, s9, s8
@@ -6000,58 +6027,58 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: mv t5, t3
; RV32I-NEXT: mv s1, a5
; RV32I-NEXT: mv a3, a6
-; RV32I-NEXT: beqz ra, .LBB15_57
-; RV32I-NEXT: # %bb.56:
+; RV32I-NEXT: beqz ra, .LBB15_54
+; RV32I-NEXT: # %bb.53:
; RV32I-NEXT: mv t5, s3
; RV32I-NEXT: mv s1, s4
-; RV32I-NEXT: .LBB15_57:
+; RV32I-NEXT: .LBB15_54:
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, s11, a1
; RV32I-NEXT: or s1, t6, t0
; RV32I-NEXT: li a1, 64
; RV32I-NEXT: mv a6, a7
; RV32I-NEXT: mv a7, s0
-; RV32I-NEXT: bltu ra, a1, .LBB15_59
-; RV32I-NEXT: # %bb.58:
+; RV32I-NEXT: bltu ra, a1, .LBB15_56
+; RV32I-NEXT: # %bb.55:
; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .LBB15_59:
+; RV32I-NEXT: .LBB15_56:
; RV32I-NEXT: srl s3, s2, a7
; RV32I-NEXT: sll ra, s1, a4
; RV32I-NEXT: mv a7, s5
; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu a4, t4, .LBB15_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: bltu a4, t4, .LBB15_58
+; RV32I-NEXT: # %bb.57:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, s2, a4
-; RV32I-NEXT: j .LBB15_62
-; RV32I-NEXT: .LBB15_61:
+; RV32I-NEXT: j .LBB15_59
+; RV32I-NEXT: .LBB15_58:
; RV32I-NEXT: sll a1, s2, a4
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a1, s3, ra
-; RV32I-NEXT: .LBB15_62:
+; RV32I-NEXT: .LBB15_59:
; RV32I-NEXT: lbu s9, 27(a0)
; RV32I-NEXT: lbu t6, 31(a0)
; RV32I-NEXT: mv t5, s1
-; RV32I-NEXT: beqz a4, .LBB15_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: beqz a4, .LBB15_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB15_64:
+; RV32I-NEXT: .LBB15_61:
; RV32I-NEXT: lbu s8, 25(a0)
; RV32I-NEXT: lbu s4, 26(a0)
; RV32I-NEXT: lbu s11, 29(a0)
; RV32I-NEXT: lbu s10, 30(a0)
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: slli t6, t6, 8
-; RV32I-NEXT: bltu s6, t4, .LBB15_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: bltu s6, t4, .LBB15_63
+; RV32I-NEXT: # %bb.62:
; RV32I-NEXT: srl t0, s1, s6
-; RV32I-NEXT: j .LBB15_67
-; RV32I-NEXT: .LBB15_66:
+; RV32I-NEXT: j .LBB15_64
+; RV32I-NEXT: .LBB15_63:
; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: sll a1, s1, a1
; RV32I-NEXT: or t0, s3, a1
-; RV32I-NEXT: .LBB15_67:
+; RV32I-NEXT: .LBB15_64:
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: lbu s3, 24(a0)
; RV32I-NEXT: lbu a1, 28(a0)
@@ -6059,170 +6086,174 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: or t6, t6, s10
; RV32I-NEXT: mv s9, s2
-; RV32I-NEXT: beqz s6, .LBB15_69
-; RV32I-NEXT: # %bb.68:
+; RV32I-NEXT: beqz s6, .LBB15_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: mv s9, t0
-; RV32I-NEXT: .LBB15_69:
+; RV32I-NEXT: .LBB15_66:
; RV32I-NEXT: or a0, s8, s3
; RV32I-NEXT: slli t0, s4, 16
; RV32I-NEXT: or a1, s11, a1
; RV32I-NEXT: slli t6, t6, 16
-; RV32I-NEXT: bltu s6, t4, .LBB15_71
-; RV32I-NEXT: # %bb.70:
+; RV32I-NEXT: bltu s6, t4, .LBB15_68
+; RV32I-NEXT: # %bb.67:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB15_72
-; RV32I-NEXT: .LBB15_71:
+; RV32I-NEXT: j .LBB15_69
+; RV32I-NEXT: .LBB15_68:
; RV32I-NEXT: srl s4, s1, s0
-; RV32I-NEXT: .LBB15_72:
+; RV32I-NEXT: .LBB15_69:
; RV32I-NEXT: li s11, 64
; RV32I-NEXT: or s6, t0, a0
; RV32I-NEXT: or a0, t6, a1
-; RV32I-NEXT: bltu a4, t4, .LBB15_74
-; RV32I-NEXT: # %bb.73:
+; RV32I-NEXT: bltu a4, t4, .LBB15_71
+; RV32I-NEXT: # %bb.70:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a1, s6, a4
-; RV32I-NEXT: j .LBB15_75
-; RV32I-NEXT: .LBB15_74:
+; RV32I-NEXT: mv s10, a0
+; RV32I-NEXT: bnez a4, .LBB15_72
+; RV32I-NEXT: j .LBB15_73
+; RV32I-NEXT: .LBB15_71:
; RV32I-NEXT: sll s3, s6, a4
; RV32I-NEXT: srl a1, s6, s0
; RV32I-NEXT: sll t0, a0, a4
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: .LBB15_75:
; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: beqz a4, .LBB15_77
-; RV32I-NEXT: # %bb.76:
+; RV32I-NEXT: beqz a4, .LBB15_73
+; RV32I-NEXT: .LBB15_72:
; RV32I-NEXT: mv s10, a1
-; RV32I-NEXT: .LBB15_77:
-; RV32I-NEXT: bltu s7, t4, .LBB15_79
-; RV32I-NEXT: # %bb.78:
+; RV32I-NEXT: .LBB15_73:
+; RV32I-NEXT: bltu s7, t4, .LBB15_75
+; RV32I-NEXT: # %bb.74:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, s2, s7
-; RV32I-NEXT: j .LBB15_80
-; RV32I-NEXT: .LBB15_79:
+; RV32I-NEXT: mv s0, s1
+; RV32I-NEXT: bnez s7, .LBB15_76
+; RV32I-NEXT: j .LBB15_77
+; RV32I-NEXT: .LBB15_75:
; RV32I-NEXT: sll s5, s2, a4
; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: srl a1, s2, a1
; RV32I-NEXT: or a1, a1, ra
-; RV32I-NEXT: .LBB15_80:
; RV32I-NEXT: mv s0, s1
-; RV32I-NEXT: beqz s7, .LBB15_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: beqz s7, .LBB15_77
+; RV32I-NEXT: .LBB15_76:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB15_82:
-; RV32I-NEXT: bltu a4, s11, .LBB15_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: .LBB15_77:
+; RV32I-NEXT: bltu a4, s11, .LBB15_79
+; RV32I-NEXT: # %bb.78:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: li t5, 0
-; RV32I-NEXT: j .LBB15_85
-; RV32I-NEXT: .LBB15_84:
+; RV32I-NEXT: j .LBB15_80
+; RV32I-NEXT: .LBB15_79:
; RV32I-NEXT: or s5, s9, s3
; RV32I-NEXT: or s0, s4, s10
-; RV32I-NEXT: .LBB15_85:
+; RV32I-NEXT: .LBB15_80:
; RV32I-NEXT: addi s9, a4, -128
; RV32I-NEXT: mv s7, s6
; RV32I-NEXT: mv s8, a0
-; RV32I-NEXT: beqz a4, .LBB15_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: beqz a4, .LBB15_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: mv s7, s5
; RV32I-NEXT: mv s8, s0
-; RV32I-NEXT: .LBB15_87:
+; RV32I-NEXT: .LBB15_82:
; RV32I-NEXT: neg s3, s9
; RV32I-NEXT: srl s0, t3, s3
-; RV32I-NEXT: bltu s9, t4, .LBB15_89
-; RV32I-NEXT: # %bb.88:
+; RV32I-NEXT: bltu s9, t4, .LBB15_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, t3, s9
-; RV32I-NEXT: j .LBB15_90
-; RV32I-NEXT: .LBB15_89:
+; RV32I-NEXT: j .LBB15_85
+; RV32I-NEXT: .LBB15_84:
; RV32I-NEXT: sll s5, t3, a4
; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, s0, a1
-; RV32I-NEXT: .LBB15_90:
+; RV32I-NEXT: .LBB15_85:
; RV32I-NEXT: sub s4, s11, s9
; RV32I-NEXT: mv t6, a5
-; RV32I-NEXT: beqz s9, .LBB15_92
-; RV32I-NEXT: # %bb.91:
+; RV32I-NEXT: beqz s9, .LBB15_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: mv t6, a1
-; RV32I-NEXT: .LBB15_92:
-; RV32I-NEXT: bltu s4, t4, .LBB15_94
-; RV32I-NEXT: # %bb.93:
+; RV32I-NEXT: .LBB15_87:
+; RV32I-NEXT: bltu s4, t4, .LBB15_89
+; RV32I-NEXT: # %bb.88:
; RV32I-NEXT: srl a1, a5, s4
-; RV32I-NEXT: j .LBB15_95
-; RV32I-NEXT: .LBB15_94:
+; RV32I-NEXT: mv s0, t3
+; RV32I-NEXT: bnez s4, .LBB15_90
+; RV32I-NEXT: j .LBB15_91
+; RV32I-NEXT: .LBB15_89:
; RV32I-NEXT: neg a1, s4
; RV32I-NEXT: sll a1, a5, a1
; RV32I-NEXT: or a1, s0, a1
-; RV32I-NEXT: .LBB15_95:
; RV32I-NEXT: mv s0, t3
-; RV32I-NEXT: beqz s4, .LBB15_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: beqz s4, .LBB15_91
+; RV32I-NEXT: .LBB15_90:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB15_97:
-; RV32I-NEXT: bltu s4, t4, .LBB15_101
-; RV32I-NEXT: # %bb.98:
+; RV32I-NEXT: .LBB15_91:
+; RV32I-NEXT: bltu s4, t4, .LBB15_94
+; RV32I-NEXT: # %bb.92:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: .LBB15_99:
; RV32I-NEXT: li ra, 64
-; RV32I-NEXT: bgeu s9, t4, .LBB15_102
-; RV32I-NEXT: # %bb.100:
+; RV32I-NEXT: bgeu s9, t4, .LBB15_95
+; RV32I-NEXT: .LBB15_93:
; RV32I-NEXT: sll s10, t1, a4
; RV32I-NEXT: srl a1, t1, s3
; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: j .LBB15_103
-; RV32I-NEXT: .LBB15_101:
+; RV32I-NEXT: j .LBB15_96
+; RV32I-NEXT: .LBB15_94:
; RV32I-NEXT: srl s4, a5, s3
-; RV32I-NEXT: j .LBB15_99
-; RV32I-NEXT: .LBB15_102:
+; RV32I-NEXT: li ra, 64
+; RV32I-NEXT: bltu s9, t4, .LBB15_93
+; RV32I-NEXT: .LBB15_95:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a1, t1, s9
-; RV32I-NEXT: .LBB15_103:
+; RV32I-NEXT: .LBB15_96:
; RV32I-NEXT: addi s11, s9, -64
; RV32I-NEXT: mv s3, t2
-; RV32I-NEXT: beqz s9, .LBB15_105
-; RV32I-NEXT: # %bb.104:
+; RV32I-NEXT: beqz s9, .LBB15_98
+; RV32I-NEXT: # %bb.97:
; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: .LBB15_105:
-; RV32I-NEXT: bltu s11, t4, .LBB15_107
-; RV32I-NEXT: # %bb.106:
+; RV32I-NEXT: .LBB15_98:
+; RV32I-NEXT: bltu s11, t4, .LBB15_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: li t4, 0
; RV32I-NEXT: sll a1, t3, s11
-; RV32I-NEXT: bnez s11, .LBB15_108
-; RV32I-NEXT: j .LBB15_109
-; RV32I-NEXT: .LBB15_107:
+; RV32I-NEXT: bnez s11, .LBB15_101
+; RV32I-NEXT: j .LBB15_102
+; RV32I-NEXT: .LBB15_100:
; RV32I-NEXT: sll t4, t3, s9
; RV32I-NEXT: neg a1, s11
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: sll t0, a5, s9
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: beqz s11, .LBB15_109
-; RV32I-NEXT: .LBB15_108:
+; RV32I-NEXT: beqz s11, .LBB15_102
+; RV32I-NEXT: .LBB15_101:
; RV32I-NEXT: mv a5, a1
-; RV32I-NEXT: .LBB15_109:
-; RV32I-NEXT: bltu s9, ra, .LBB15_111
-; RV32I-NEXT: # %bb.110:
+; RV32I-NEXT: .LBB15_102:
+; RV32I-NEXT: bltu s9, ra, .LBB15_104
+; RV32I-NEXT: # %bb.103:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li t6, 0
-; RV32I-NEXT: j .LBB15_112
-; RV32I-NEXT: .LBB15_111:
+; RV32I-NEXT: li a1, 128
+; RV32I-NEXT: bnez s9, .LBB15_105
+; RV32I-NEXT: j .LBB15_106
+; RV32I-NEXT: .LBB15_104:
; RV32I-NEXT: or t4, s0, s10
; RV32I-NEXT: or a5, s4, s3
-; RV32I-NEXT: .LBB15_112:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: beqz s9, .LBB15_114
-; RV32I-NEXT: # %bb.113:
+; RV32I-NEXT: beqz s9, .LBB15_106
+; RV32I-NEXT: .LBB15_105:
; RV32I-NEXT: mv t1, t4
; RV32I-NEXT: mv t2, a5
-; RV32I-NEXT: .LBB15_114:
-; RV32I-NEXT: bltu a4, a1, .LBB15_116
-; RV32I-NEXT: # %bb.115:
+; RV32I-NEXT: .LBB15_106:
+; RV32I-NEXT: bltu a4, a1, .LBB15_108
+; RV32I-NEXT: # %bb.107:
; RV32I-NEXT: li ra, 0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a6, 0
-; RV32I-NEXT: bnez a4, .LBB15_117
-; RV32I-NEXT: j .LBB15_118
-; RV32I-NEXT: .LBB15_116:
+; RV32I-NEXT: bnez a4, .LBB15_109
+; RV32I-NEXT: j .LBB15_110
+; RV32I-NEXT: .LBB15_108:
; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s5, a1, a5
@@ -6233,13 +6264,13 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t2, a1, s8
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: beqz a4, .LBB15_118
-; RV32I-NEXT: .LBB15_117:
+; RV32I-NEXT: beqz a4, .LBB15_110
+; RV32I-NEXT: .LBB15_109:
; RV32I-NEXT: mv s2, s5
; RV32I-NEXT: mv s1, t6
; RV32I-NEXT: mv s6, t1
; RV32I-NEXT: mv a0, t2
-; RV32I-NEXT: .LBB15_118:
+; RV32I-NEXT: .LBB15_110:
; RV32I-NEXT: srli a4, ra, 16
; RV32I-NEXT: lui t2, 16
; RV32I-NEXT: srli t1, ra, 24
@@ -6767,112 +6798,115 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: # %bb.19:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: sll a1, t3, s7
-; RV32I-NEXT: j .LBB16_21
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: bnez s7, .LBB16_21
+; RV32I-NEXT: j .LBB16_22
; RV32I-NEXT: .LBB16_20:
; RV32I-NEXT: sll s2, t3, a4
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: or a1, a1, s5
-; RV32I-NEXT: .LBB16_21:
; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: beqz s7, .LBB16_23
-; RV32I-NEXT: # %bb.22:
+; RV32I-NEXT: beqz s7, .LBB16_22
+; RV32I-NEXT: .LBB16_21:
; RV32I-NEXT: mv s4, a1
-; RV32I-NEXT: .LBB16_23:
+; RV32I-NEXT: .LBB16_22:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: bltu a4, s9, .LBB16_25
-; RV32I-NEXT: # %bb.24:
+; RV32I-NEXT: bltu a4, s9, .LBB16_24
+; RV32I-NEXT: # %bb.23:
; RV32I-NEXT: sw zero, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: j .LBB16_26
-; RV32I-NEXT: .LBB16_25:
+; RV32I-NEXT: j .LBB16_25
+; RV32I-NEXT: .LBB16_24:
; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, a6, s1
; RV32I-NEXT: or s4, a7, s3
-; RV32I-NEXT: .LBB16_26:
+; RV32I-NEXT: .LBB16_25:
; RV32I-NEXT: sub ra, a1, a4
; RV32I-NEXT: mv a7, t1
; RV32I-NEXT: mv a6, t2
-; RV32I-NEXT: beqz a4, .LBB16_28
-; RV32I-NEXT: # %bb.27:
+; RV32I-NEXT: beqz a4, .LBB16_27
+; RV32I-NEXT: # %bb.26:
; RV32I-NEXT: mv a7, s2
; RV32I-NEXT: mv a6, s4
-; RV32I-NEXT: .LBB16_28:
+; RV32I-NEXT: .LBB16_27:
; RV32I-NEXT: neg s1, ra
; RV32I-NEXT: sll s2, t2, s1
-; RV32I-NEXT: bltu ra, t4, .LBB16_30
-; RV32I-NEXT: # %bb.29:
+; RV32I-NEXT: bltu ra, t4, .LBB16_29
+; RV32I-NEXT: # %bb.28:
; RV32I-NEXT: srl a1, t2, ra
+; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: bnez ra, .LBB16_30
; RV32I-NEXT: j .LBB16_31
-; RV32I-NEXT: .LBB16_30:
+; RV32I-NEXT: .LBB16_29:
; RV32I-NEXT: or a1, s0, s2
-; RV32I-NEXT: .LBB16_31:
; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: beqz ra, .LBB16_33
-; RV32I-NEXT: # %bb.32:
+; RV32I-NEXT: beqz ra, .LBB16_31
+; RV32I-NEXT: .LBB16_30:
; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .LBB16_33:
-; RV32I-NEXT: bltu ra, t4, .LBB16_35
-; RV32I-NEXT: # %bb.34:
+; RV32I-NEXT: .LBB16_31:
+; RV32I-NEXT: bltu ra, t4, .LBB16_33
+; RV32I-NEXT: # %bb.32:
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: srl a1, a5, ra
-; RV32I-NEXT: j .LBB16_36
-; RV32I-NEXT: .LBB16_35:
+; RV32I-NEXT: mv t5, t3
+; RV32I-NEXT: bnez ra, .LBB16_34
+; RV32I-NEXT: j .LBB16_35
+; RV32I-NEXT: .LBB16_33:
; RV32I-NEXT: srl a1, t2, s10
; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, a5, s1
; RV32I-NEXT: or a1, t5, a1
-; RV32I-NEXT: .LBB16_36:
; RV32I-NEXT: mv t5, t3
-; RV32I-NEXT: beqz ra, .LBB16_38
-; RV32I-NEXT: # %bb.37:
+; RV32I-NEXT: beqz ra, .LBB16_35
+; RV32I-NEXT: .LBB16_34:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB16_38:
+; RV32I-NEXT: .LBB16_35:
; RV32I-NEXT: sub s3, s9, ra
-; RV32I-NEXT: bltu ra, t4, .LBB16_41
-; RV32I-NEXT: # %bb.39:
+; RV32I-NEXT: bltu ra, t4, .LBB16_38
+; RV32I-NEXT: # %bb.36:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s3, t4, .LBB16_42
-; RV32I-NEXT: .LBB16_40:
+; RV32I-NEXT: bgeu s3, t4, .LBB16_39
+; RV32I-NEXT: .LBB16_37:
; RV32I-NEXT: sll s1, t1, s1
; RV32I-NEXT: neg a1, s3
; RV32I-NEXT: srl a1, t1, a1
; RV32I-NEXT: or a1, a1, s2
-; RV32I-NEXT: j .LBB16_43
-; RV32I-NEXT: .LBB16_41:
+; RV32I-NEXT: j .LBB16_40
+; RV32I-NEXT: .LBB16_38:
; RV32I-NEXT: srl a1, a5, s10
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s3, t4, .LBB16_40
-; RV32I-NEXT: .LBB16_42:
+; RV32I-NEXT: bltu s3, t4, .LBB16_37
+; RV32I-NEXT: .LBB16_39:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: sll a1, t1, s3
-; RV32I-NEXT: .LBB16_43:
+; RV32I-NEXT: .LBB16_40:
; RV32I-NEXT: addi s4, ra, -64
; RV32I-NEXT: mv s2, t2
-; RV32I-NEXT: beqz s3, .LBB16_45
-; RV32I-NEXT: # %bb.44:
+; RV32I-NEXT: beqz s3, .LBB16_42
+; RV32I-NEXT: # %bb.41:
; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: .LBB16_45:
+; RV32I-NEXT: .LBB16_42:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s5, a7
-; RV32I-NEXT: bltu s4, t4, .LBB16_47
-; RV32I-NEXT: # %bb.46:
+; RV32I-NEXT: bltu s4, t4, .LBB16_44
+; RV32I-NEXT: # %bb.43:
; RV32I-NEXT: srl t0, t2, s4
-; RV32I-NEXT: j .LBB16_48
-; RV32I-NEXT: .LBB16_47:
+; RV32I-NEXT: j .LBB16_45
+; RV32I-NEXT: .LBB16_44:
; RV32I-NEXT: srl a1, t1, ra
; RV32I-NEXT: neg t0, s4
; RV32I-NEXT: sll t0, t2, t0
; RV32I-NEXT: or t0, a1, t0
-; RV32I-NEXT: .LBB16_48:
+; RV32I-NEXT: .LBB16_45:
; RV32I-NEXT: mv s0, s10
; RV32I-NEXT: mv a7, a6
; RV32I-NEXT: lbu s8, 19(a0)
; RV32I-NEXT: lbu a1, 23(a0)
; RV32I-NEXT: mv s3, t1
-; RV32I-NEXT: beqz s4, .LBB16_50
-; RV32I-NEXT: # %bb.49:
+; RV32I-NEXT: beqz s4, .LBB16_47
+; RV32I-NEXT: # %bb.46:
; RV32I-NEXT: mv s3, t0
-; RV32I-NEXT: .LBB16_50:
+; RV32I-NEXT: .LBB16_47:
; RV32I-NEXT: mv a6, a3
; RV32I-NEXT: lbu s10, 17(a0)
; RV32I-NEXT: lbu t0, 18(a0)
@@ -6881,25 +6915,25 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: slli a1, a1, 8
; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: bltu s4, t4, .LBB16_52
-; RV32I-NEXT: # %bb.51:
+; RV32I-NEXT: bltu s4, t4, .LBB16_49
+; RV32I-NEXT: # %bb.48:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB16_53
-; RV32I-NEXT: .LBB16_52:
+; RV32I-NEXT: j .LBB16_50
+; RV32I-NEXT: .LBB16_49:
; RV32I-NEXT: srl s4, t2, ra
-; RV32I-NEXT: .LBB16_53:
+; RV32I-NEXT: .LBB16_50:
; RV32I-NEXT: or s11, s8, t0
; RV32I-NEXT: lbu t0, 16(a0)
; RV32I-NEXT: lbu s8, 20(a0)
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: or t6, a1, t6
-; RV32I-NEXT: bgeu ra, a3, .LBB16_55
-; RV32I-NEXT: # %bb.54:
+; RV32I-NEXT: bgeu ra, a3, .LBB16_52
+; RV32I-NEXT: # %bb.51:
; RV32I-NEXT: or s3, t5, s1
; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s4, a1, s2
-; RV32I-NEXT: .LBB16_55:
+; RV32I-NEXT: .LBB16_52:
; RV32I-NEXT: or a1, s10, t0
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: or t0, s9, s8
@@ -6907,58 +6941,58 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: mv t5, t3
; RV32I-NEXT: mv s1, a5
; RV32I-NEXT: mv a3, a6
-; RV32I-NEXT: beqz ra, .LBB16_57
-; RV32I-NEXT: # %bb.56:
+; RV32I-NEXT: beqz ra, .LBB16_54
+; RV32I-NEXT: # %bb.53:
; RV32I-NEXT: mv t5, s3
; RV32I-NEXT: mv s1, s4
-; RV32I-NEXT: .LBB16_57:
+; RV32I-NEXT: .LBB16_54:
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, s11, a1
; RV32I-NEXT: or s1, t6, t0
; RV32I-NEXT: li a1, 64
; RV32I-NEXT: mv a6, a7
; RV32I-NEXT: mv a7, s0
-; RV32I-NEXT: bltu ra, a1, .LBB16_59
-; RV32I-NEXT: # %bb.58:
+; RV32I-NEXT: bltu ra, a1, .LBB16_56
+; RV32I-NEXT: # %bb.55:
; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .LBB16_59:
+; RV32I-NEXT: .LBB16_56:
; RV32I-NEXT: srl s3, s2, a7
; RV32I-NEXT: sll ra, s1, a4
; RV32I-NEXT: mv a7, s5
; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu a4, t4, .LBB16_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: bltu a4, t4, .LBB16_58
+; RV32I-NEXT: # %bb.57:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, s2, a4
-; RV32I-NEXT: j .LBB16_62
-; RV32I-NEXT: .LBB16_61:
+; RV32I-NEXT: j .LBB16_59
+; RV32I-NEXT: .LBB16_58:
; RV32I-NEXT: sll a1, s2, a4
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a1, s3, ra
-; RV32I-NEXT: .LBB16_62:
+; RV32I-NEXT: .LBB16_59:
; RV32I-NEXT: lbu s9, 27(a0)
; RV32I-NEXT: lbu t6, 31(a0)
; RV32I-NEXT: mv t5, s1
-; RV32I-NEXT: beqz a4, .LBB16_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: beqz a4, .LBB16_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB16_64:
+; RV32I-NEXT: .LBB16_61:
; RV32I-NEXT: lbu s8, 25(a0)
; RV32I-NEXT: lbu s4, 26(a0)
; RV32I-NEXT: lbu s11, 29(a0)
; RV32I-NEXT: lbu s10, 30(a0)
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: slli t6, t6, 8
-; RV32I-NEXT: bltu s6, t4, .LBB16_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: bltu s6, t4, .LBB16_63
+; RV32I-NEXT: # %bb.62:
; RV32I-NEXT: srl t0, s1, s6
-; RV32I-NEXT: j .LBB16_67
-; RV32I-NEXT: .LBB16_66:
+; RV32I-NEXT: j .LBB16_64
+; RV32I-NEXT: .LBB16_63:
; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: sll a1, s1, a1
; RV32I-NEXT: or t0, s3, a1
-; RV32I-NEXT: .LBB16_67:
+; RV32I-NEXT: .LBB16_64:
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: lbu s3, 24(a0)
; RV32I-NEXT: lbu a1, 28(a0)
@@ -6966,170 +7000,174 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: or t6, t6, s10
; RV32I-NEXT: mv s9, s2
-; RV32I-NEXT: beqz s6, .LBB16_69
-; RV32I-NEXT: # %bb.68:
+; RV32I-NEXT: beqz s6, .LBB16_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: mv s9, t0
-; RV32I-NEXT: .LBB16_69:
+; RV32I-NEXT: .LBB16_66:
; RV32I-NEXT: or a0, s8, s3
; RV32I-NEXT: slli t0, s4, 16
; RV32I-NEXT: or a1, s11, a1
; RV32I-NEXT: slli t6, t6, 16
-; RV32I-NEXT: bltu s6, t4, .LBB16_71
-; RV32I-NEXT: # %bb.70:
+; RV32I-NEXT: bltu s6, t4, .LBB16_68
+; RV32I-NEXT: # %bb.67:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB16_72
-; RV32I-NEXT: .LBB16_71:
+; RV32I-NEXT: j .LBB16_69
+; RV32I-NEXT: .LBB16_68:
; RV32I-NEXT: srl s4, s1, s0
-; RV32I-NEXT: .LBB16_72:
+; RV32I-NEXT: .LBB16_69:
; RV32I-NEXT: li s11, 64
; RV32I-NEXT: or s6, t0, a0
; RV32I-NEXT: or a0, t6, a1
-; RV32I-NEXT: bltu a4, t4, .LBB16_74
-; RV32I-NEXT: # %bb.73:
+; RV32I-NEXT: bltu a4, t4, .LBB16_71
+; RV32I-NEXT: # %bb.70:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a1, s6, a4
-; RV32I-NEXT: j .LBB16_75
-; RV32I-NEXT: .LBB16_74:
+; RV32I-NEXT: mv s10, a0
+; RV32I-NEXT: bnez a4, .LBB16_72
+; RV32I-NEXT: j .LBB16_73
+; RV32I-NEXT: .LBB16_71:
; RV32I-NEXT: sll s3, s6, a4
; RV32I-NEXT: srl a1, s6, s0
; RV32I-NEXT: sll t0, a0, a4
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: .LBB16_75:
; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: beqz a4, .LBB16_77
-; RV32I-NEXT: # %bb.76:
+; RV32I-NEXT: beqz a4, .LBB16_73
+; RV32I-NEXT: .LBB16_72:
; RV32I-NEXT: mv s10, a1
-; RV32I-NEXT: .LBB16_77:
-; RV32I-NEXT: bltu s7, t4, .LBB16_79
-; RV32I-NEXT: # %bb.78:
+; RV32I-NEXT: .LBB16_73:
+; RV32I-NEXT: bltu s7, t4, .LBB16_75
+; RV32I-NEXT: # %bb.74:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, s2, s7
-; RV32I-NEXT: j .LBB16_80
-; RV32I-NEXT: .LBB16_79:
+; RV32I-NEXT: mv s0, s1
+; RV32I-NEXT: bnez s7, .LBB16_76
+; RV32I-NEXT: j .LBB16_77
+; RV32I-NEXT: .LBB16_75:
; RV32I-NEXT: sll s5, s2, a4
; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: srl a1, s2, a1
; RV32I-NEXT: or a1, a1, ra
-; RV32I-NEXT: .LBB16_80:
; RV32I-NEXT: mv s0, s1
-; RV32I-NEXT: beqz s7, .LBB16_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: beqz s7, .LBB16_77
+; RV32I-NEXT: .LBB16_76:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB16_82:
-; RV32I-NEXT: bltu a4, s11, .LBB16_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: .LBB16_77:
+; RV32I-NEXT: bltu a4, s11, .LBB16_79
+; RV32I-NEXT: # %bb.78:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: li t5, 0
-; RV32I-NEXT: j .LBB16_85
-; RV32I-NEXT: .LBB16_84:
+; RV32I-NEXT: j .LBB16_80
+; RV32I-NEXT: .LBB16_79:
; RV32I-NEXT: or s5, s9, s3
; RV32I-NEXT: or s0, s4, s10
-; RV32I-NEXT: .LBB16_85:
+; RV32I-NEXT: .LBB16_80:
; RV32I-NEXT: addi s9, a4, -128
; RV32I-NEXT: mv s7, s6
; RV32I-NEXT: mv s8, a0
-; RV32I-NEXT: beqz a4, .LBB16_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: beqz a4, .LBB16_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: mv s7, s5
; RV32I-NEXT: mv s8, s0
-; RV32I-NEXT: .LBB16_87:
+; RV32I-NEXT: .LBB16_82:
; RV32I-NEXT: neg s3, s9
; RV32I-NEXT: srl s0, t3, s3
-; RV32I-NEXT: bltu s9, t4, .LBB16_89
-; RV32I-NEXT: # %bb.88:
+; RV32I-NEXT: bltu s9, t4, .LBB16_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, t3, s9
-; RV32I-NEXT: j .LBB16_90
-; RV32I-NEXT: .LBB16_89:
+; RV32I-NEXT: j .LBB16_85
+; RV32I-NEXT: .LBB16_84:
; RV32I-NEXT: sll s5, t3, a4
; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, s0, a1
-; RV32I-NEXT: .LBB16_90:
+; RV32I-NEXT: .LBB16_85:
; RV32I-NEXT: sub s4, s11, s9
; RV32I-NEXT: mv t6, a5
-; RV32I-NEXT: beqz s9, .LBB16_92
-; RV32I-NEXT: # %bb.91:
+; RV32I-NEXT: beqz s9, .LBB16_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: mv t6, a1
-; RV32I-NEXT: .LBB16_92:
-; RV32I-NEXT: bltu s4, t4, .LBB16_94
-; RV32I-NEXT: # %bb.93:
+; RV32I-NEXT: .LBB16_87:
+; RV32I-NEXT: bltu s4, t4, .LBB16_89
+; RV32I-NEXT: # %bb.88:
; RV32I-NEXT: srl a1, a5, s4
-; RV32I-NEXT: j .LBB16_95
-; RV32I-NEXT: .LBB16_94:
+; RV32I-NEXT: mv s0, t3
+; RV32I-NEXT: bnez s4, .LBB16_90
+; RV32I-NEXT: j .LBB16_91
+; RV32I-NEXT: .LBB16_89:
; RV32I-NEXT: neg a1, s4
; RV32I-NEXT: sll a1, a5, a1
; RV32I-NEXT: or a1, s0, a1
-; RV32I-NEXT: .LBB16_95:
; RV32I-NEXT: mv s0, t3
-; RV32I-NEXT: beqz s4, .LBB16_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: beqz s4, .LBB16_91
+; RV32I-NEXT: .LBB16_90:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB16_97:
-; RV32I-NEXT: bltu s4, t4, .LBB16_101
-; RV32I-NEXT: # %bb.98:
+; RV32I-NEXT: .LBB16_91:
+; RV32I-NEXT: bltu s4, t4, .LBB16_94
+; RV32I-NEXT: # %bb.92:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: .LBB16_99:
; RV32I-NEXT: li ra, 64
-; RV32I-NEXT: bgeu s9, t4, .LBB16_102
-; RV32I-NEXT: # %bb.100:
+; RV32I-NEXT: bgeu s9, t4, .LBB16_95
+; RV32I-NEXT: .LBB16_93:
; RV32I-NEXT: sll s10, t1, a4
; RV32I-NEXT: srl a1, t1, s3
; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: j .LBB16_103
-; RV32I-NEXT: .LBB16_101:
+; RV32I-NEXT: j .LBB16_96
+; RV32I-NEXT: .LBB16_94:
; RV32I-NEXT: srl s4, a5, s3
-; RV32I-NEXT: j .LBB16_99
-; RV32I-NEXT: .LBB16_102:
+; RV32I-NEXT: li ra, 64
+; RV32I-NEXT: bltu s9, t4, .LBB16_93
+; RV32I-NEXT: .LBB16_95:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a1, t1, s9
-; RV32I-NEXT: .LBB16_103:
+; RV32I-NEXT: .LBB16_96:
; RV32I-NEXT: addi s11, s9, -64
; RV32I-NEXT: mv s3, t2
-; RV32I-NEXT: beqz s9, .LBB16_105
-; RV32I-NEXT: # %bb.104:
+; RV32I-NEXT: beqz s9, .LBB16_98
+; RV32I-NEXT: # %bb.97:
; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: .LBB16_105:
-; RV32I-NEXT: bltu s11, t4, .LBB16_107
-; RV32I-NEXT: # %bb.106:
+; RV32I-NEXT: .LBB16_98:
+; RV32I-NEXT: bltu s11, t4, .LBB16_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: li t4, 0
; RV32I-NEXT: sll a1, t3, s11
-; RV32I-NEXT: bnez s11, .LBB16_108
-; RV32I-NEXT: j .LBB16_109
-; RV32I-NEXT: .LBB16_107:
+; RV32I-NEXT: bnez s11, .LBB16_101
+; RV32I-NEXT: j .LBB16_102
+; RV32I-NEXT: .LBB16_100:
; RV32I-NEXT: sll t4, t3, s9
; RV32I-NEXT: neg a1, s11
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: sll t0, a5, s9
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: beqz s11, .LBB16_109
-; RV32I-NEXT: .LBB16_108:
+; RV32I-NEXT: beqz s11, .LBB16_102
+; RV32I-NEXT: .LBB16_101:
; RV32I-NEXT: mv a5, a1
-; RV32I-NEXT: .LBB16_109:
-; RV32I-NEXT: bltu s9, ra, .LBB16_111
-; RV32I-NEXT: # %bb.110:
+; RV32I-NEXT: .LBB16_102:
+; RV32I-NEXT: bltu s9, ra, .LBB16_104
+; RV32I-NEXT: # %bb.103:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li t6, 0
-; RV32I-NEXT: j .LBB16_112
-; RV32I-NEXT: .LBB16_111:
+; RV32I-NEXT: li a1, 128
+; RV32I-NEXT: bnez s9, .LBB16_105
+; RV32I-NEXT: j .LBB16_106
+; RV32I-NEXT: .LBB16_104:
; RV32I-NEXT: or t4, s0, s10
; RV32I-NEXT: or a5, s4, s3
-; RV32I-NEXT: .LBB16_112:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: beqz s9, .LBB16_114
-; RV32I-NEXT: # %bb.113:
+; RV32I-NEXT: beqz s9, .LBB16_106
+; RV32I-NEXT: .LBB16_105:
; RV32I-NEXT: mv t1, t4
; RV32I-NEXT: mv t2, a5
-; RV32I-NEXT: .LBB16_114:
-; RV32I-NEXT: bltu a4, a1, .LBB16_116
-; RV32I-NEXT: # %bb.115:
+; RV32I-NEXT: .LBB16_106:
+; RV32I-NEXT: bltu a4, a1, .LBB16_108
+; RV32I-NEXT: # %bb.107:
; RV32I-NEXT: li ra, 0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a6, 0
-; RV32I-NEXT: bnez a4, .LBB16_117
-; RV32I-NEXT: j .LBB16_118
-; RV32I-NEXT: .LBB16_116:
+; RV32I-NEXT: bnez a4, .LBB16_109
+; RV32I-NEXT: j .LBB16_110
+; RV32I-NEXT: .LBB16_108:
; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s5, a1, a5
@@ -7140,13 +7178,13 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t2, a1, s8
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: beqz a4, .LBB16_118
-; RV32I-NEXT: .LBB16_117:
+; RV32I-NEXT: beqz a4, .LBB16_110
+; RV32I-NEXT: .LBB16_109:
; RV32I-NEXT: mv s2, s5
; RV32I-NEXT: mv s1, t6
; RV32I-NEXT: mv s6, t1
; RV32I-NEXT: mv a0, t2
-; RV32I-NEXT: .LBB16_118:
+; RV32I-NEXT: .LBB16_110:
; RV32I-NEXT: srli a4, ra, 16
; RV32I-NEXT: lui t2, 16
; RV32I-NEXT: srli t1, ra, 24
@@ -7674,112 +7712,115 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: # %bb.19:
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: sll a1, t3, s7
-; RV32I-NEXT: j .LBB17_21
+; RV32I-NEXT: mv s4, a5
+; RV32I-NEXT: bnez s7, .LBB17_21
+; RV32I-NEXT: j .LBB17_22
; RV32I-NEXT: .LBB17_20:
; RV32I-NEXT: sll s2, t3, a4
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: or a1, a1, s5
-; RV32I-NEXT: .LBB17_21:
; RV32I-NEXT: mv s4, a5
-; RV32I-NEXT: beqz s7, .LBB17_23
-; RV32I-NEXT: # %bb.22:
+; RV32I-NEXT: beqz s7, .LBB17_22
+; RV32I-NEXT: .LBB17_21:
; RV32I-NEXT: mv s4, a1
-; RV32I-NEXT: .LBB17_23:
+; RV32I-NEXT: .LBB17_22:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: bltu a4, s9, .LBB17_25
-; RV32I-NEXT: # %bb.24:
+; RV32I-NEXT: bltu a4, s9, .LBB17_24
+; RV32I-NEXT: # %bb.23:
; RV32I-NEXT: sw zero, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: j .LBB17_26
-; RV32I-NEXT: .LBB17_25:
+; RV32I-NEXT: j .LBB17_25
+; RV32I-NEXT: .LBB17_24:
; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, a6, s1
; RV32I-NEXT: or s4, a7, s3
-; RV32I-NEXT: .LBB17_26:
+; RV32I-NEXT: .LBB17_25:
; RV32I-NEXT: sub ra, a1, a4
; RV32I-NEXT: mv a7, t1
; RV32I-NEXT: mv a6, t2
-; RV32I-NEXT: beqz a4, .LBB17_28
-; RV32I-NEXT: # %bb.27:
+; RV32I-NEXT: beqz a4, .LBB17_27
+; RV32I-NEXT: # %bb.26:
; RV32I-NEXT: mv a7, s2
; RV32I-NEXT: mv a6, s4
-; RV32I-NEXT: .LBB17_28:
+; RV32I-NEXT: .LBB17_27:
; RV32I-NEXT: neg s1, ra
; RV32I-NEXT: sll s2, t2, s1
-; RV32I-NEXT: bltu ra, t4, .LBB17_30
-; RV32I-NEXT: # %bb.29:
+; RV32I-NEXT: bltu ra, t4, .LBB17_29
+; RV32I-NEXT: # %bb.28:
; RV32I-NEXT: srl a1, t2, ra
+; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT: bnez ra, .LBB17_30
; RV32I-NEXT: j .LBB17_31
-; RV32I-NEXT: .LBB17_30:
+; RV32I-NEXT: .LBB17_29:
; RV32I-NEXT: or a1, s0, s2
-; RV32I-NEXT: .LBB17_31:
; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: beqz ra, .LBB17_33
-; RV32I-NEXT: # %bb.32:
+; RV32I-NEXT: beqz ra, .LBB17_31
+; RV32I-NEXT: .LBB17_30:
; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .LBB17_33:
-; RV32I-NEXT: bltu ra, t4, .LBB17_35
-; RV32I-NEXT: # %bb.34:
+; RV32I-NEXT: .LBB17_31:
+; RV32I-NEXT: bltu ra, t4, .LBB17_33
+; RV32I-NEXT: # %bb.32:
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: srl a1, a5, ra
-; RV32I-NEXT: j .LBB17_36
-; RV32I-NEXT: .LBB17_35:
+; RV32I-NEXT: mv t5, t3
+; RV32I-NEXT: bnez ra, .LBB17_34
+; RV32I-NEXT: j .LBB17_35
+; RV32I-NEXT: .LBB17_33:
; RV32I-NEXT: srl a1, t2, s10
; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, a5, s1
; RV32I-NEXT: or a1, t5, a1
-; RV32I-NEXT: .LBB17_36:
; RV32I-NEXT: mv t5, t3
-; RV32I-NEXT: beqz ra, .LBB17_38
-; RV32I-NEXT: # %bb.37:
+; RV32I-NEXT: beqz ra, .LBB17_35
+; RV32I-NEXT: .LBB17_34:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB17_38:
+; RV32I-NEXT: .LBB17_35:
; RV32I-NEXT: sub s3, s9, ra
-; RV32I-NEXT: bltu ra, t4, .LBB17_41
-; RV32I-NEXT: # %bb.39:
+; RV32I-NEXT: bltu ra, t4, .LBB17_38
+; RV32I-NEXT: # %bb.36:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s3, t4, .LBB17_42
-; RV32I-NEXT: .LBB17_40:
+; RV32I-NEXT: bgeu s3, t4, .LBB17_39
+; RV32I-NEXT: .LBB17_37:
; RV32I-NEXT: sll s1, t1, s1
; RV32I-NEXT: neg a1, s3
; RV32I-NEXT: srl a1, t1, a1
; RV32I-NEXT: or a1, a1, s2
-; RV32I-NEXT: j .LBB17_43
-; RV32I-NEXT: .LBB17_41:
+; RV32I-NEXT: j .LBB17_40
+; RV32I-NEXT: .LBB17_38:
; RV32I-NEXT: srl a1, a5, s10
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s3, t4, .LBB17_40
-; RV32I-NEXT: .LBB17_42:
+; RV32I-NEXT: bltu s3, t4, .LBB17_37
+; RV32I-NEXT: .LBB17_39:
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: sll a1, t1, s3
-; RV32I-NEXT: .LBB17_43:
+; RV32I-NEXT: .LBB17_40:
; RV32I-NEXT: addi s4, ra, -64
; RV32I-NEXT: mv s2, t2
-; RV32I-NEXT: beqz s3, .LBB17_45
-; RV32I-NEXT: # %bb.44:
+; RV32I-NEXT: beqz s3, .LBB17_42
+; RV32I-NEXT: # %bb.41:
; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: .LBB17_45:
+; RV32I-NEXT: .LBB17_42:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s5, a7
-; RV32I-NEXT: bltu s4, t4, .LBB17_47
-; RV32I-NEXT: # %bb.46:
+; RV32I-NEXT: bltu s4, t4, .LBB17_44
+; RV32I-NEXT: # %bb.43:
; RV32I-NEXT: srl t0, t2, s4
-; RV32I-NEXT: j .LBB17_48
-; RV32I-NEXT: .LBB17_47:
+; RV32I-NEXT: j .LBB17_45
+; RV32I-NEXT: .LBB17_44:
; RV32I-NEXT: srl a1, t1, ra
; RV32I-NEXT: neg t0, s4
; RV32I-NEXT: sll t0, t2, t0
; RV32I-NEXT: or t0, a1, t0
-; RV32I-NEXT: .LBB17_48:
+; RV32I-NEXT: .LBB17_45:
; RV32I-NEXT: mv s0, s10
; RV32I-NEXT: mv a7, a6
; RV32I-NEXT: lbu s8, 19(a0)
; RV32I-NEXT: lbu a1, 23(a0)
; RV32I-NEXT: mv s3, t1
-; RV32I-NEXT: beqz s4, .LBB17_50
-; RV32I-NEXT: # %bb.49:
+; RV32I-NEXT: beqz s4, .LBB17_47
+; RV32I-NEXT: # %bb.46:
; RV32I-NEXT: mv s3, t0
-; RV32I-NEXT: .LBB17_50:
+; RV32I-NEXT: .LBB17_47:
; RV32I-NEXT: mv a6, a3
; RV32I-NEXT: lbu s10, 17(a0)
; RV32I-NEXT: lbu t0, 18(a0)
@@ -7788,25 +7829,25 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: slli a1, a1, 8
; RV32I-NEXT: li a3, 64
-; RV32I-NEXT: bltu s4, t4, .LBB17_52
-; RV32I-NEXT: # %bb.51:
+; RV32I-NEXT: bltu s4, t4, .LBB17_49
+; RV32I-NEXT: # %bb.48:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB17_53
-; RV32I-NEXT: .LBB17_52:
+; RV32I-NEXT: j .LBB17_50
+; RV32I-NEXT: .LBB17_49:
; RV32I-NEXT: srl s4, t2, ra
-; RV32I-NEXT: .LBB17_53:
+; RV32I-NEXT: .LBB17_50:
; RV32I-NEXT: or s11, s8, t0
; RV32I-NEXT: lbu t0, 16(a0)
; RV32I-NEXT: lbu s8, 20(a0)
; RV32I-NEXT: slli s10, s10, 8
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: or t6, a1, t6
-; RV32I-NEXT: bgeu ra, a3, .LBB17_55
-; RV32I-NEXT: # %bb.54:
+; RV32I-NEXT: bgeu ra, a3, .LBB17_52
+; RV32I-NEXT: # %bb.51:
; RV32I-NEXT: or s3, t5, s1
; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s4, a1, s2
-; RV32I-NEXT: .LBB17_55:
+; RV32I-NEXT: .LBB17_52:
; RV32I-NEXT: or a1, s10, t0
; RV32I-NEXT: slli s11, s11, 16
; RV32I-NEXT: or t0, s9, s8
@@ -7814,58 +7855,58 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: mv t5, t3
; RV32I-NEXT: mv s1, a5
; RV32I-NEXT: mv a3, a6
-; RV32I-NEXT: beqz ra, .LBB17_57
-; RV32I-NEXT: # %bb.56:
+; RV32I-NEXT: beqz ra, .LBB17_54
+; RV32I-NEXT: # %bb.53:
; RV32I-NEXT: mv t5, s3
; RV32I-NEXT: mv s1, s4
-; RV32I-NEXT: .LBB17_57:
+; RV32I-NEXT: .LBB17_54:
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or s2, s11, a1
; RV32I-NEXT: or s1, t6, t0
; RV32I-NEXT: li a1, 64
; RV32I-NEXT: mv a6, a7
; RV32I-NEXT: mv a7, s0
-; RV32I-NEXT: bltu ra, a1, .LBB17_59
-; RV32I-NEXT: # %bb.58:
+; RV32I-NEXT: bltu ra, a1, .LBB17_56
+; RV32I-NEXT: # %bb.55:
; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: .LBB17_59:
+; RV32I-NEXT: .LBB17_56:
; RV32I-NEXT: srl s3, s2, a7
; RV32I-NEXT: sll ra, s1, a4
; RV32I-NEXT: mv a7, s5
; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu a4, t4, .LBB17_61
-; RV32I-NEXT: # %bb.60:
+; RV32I-NEXT: bltu a4, t4, .LBB17_58
+; RV32I-NEXT: # %bb.57:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sll a1, s2, a4
-; RV32I-NEXT: j .LBB17_62
-; RV32I-NEXT: .LBB17_61:
+; RV32I-NEXT: j .LBB17_59
+; RV32I-NEXT: .LBB17_58:
; RV32I-NEXT: sll a1, s2, a4
; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a1, s3, ra
-; RV32I-NEXT: .LBB17_62:
+; RV32I-NEXT: .LBB17_59:
; RV32I-NEXT: lbu s9, 27(a0)
; RV32I-NEXT: lbu t6, 31(a0)
; RV32I-NEXT: mv t5, s1
-; RV32I-NEXT: beqz a4, .LBB17_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: beqz a4, .LBB17_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: mv t5, a1
-; RV32I-NEXT: .LBB17_64:
+; RV32I-NEXT: .LBB17_61:
; RV32I-NEXT: lbu s8, 25(a0)
; RV32I-NEXT: lbu s4, 26(a0)
; RV32I-NEXT: lbu s11, 29(a0)
; RV32I-NEXT: lbu s10, 30(a0)
; RV32I-NEXT: slli s9, s9, 8
; RV32I-NEXT: slli t6, t6, 8
-; RV32I-NEXT: bltu s6, t4, .LBB17_66
-; RV32I-NEXT: # %bb.65:
+; RV32I-NEXT: bltu s6, t4, .LBB17_63
+; RV32I-NEXT: # %bb.62:
; RV32I-NEXT: srl t0, s1, s6
-; RV32I-NEXT: j .LBB17_67
-; RV32I-NEXT: .LBB17_66:
+; RV32I-NEXT: j .LBB17_64
+; RV32I-NEXT: .LBB17_63:
; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: sll a1, s1, a1
; RV32I-NEXT: or t0, s3, a1
-; RV32I-NEXT: .LBB17_67:
+; RV32I-NEXT: .LBB17_64:
; RV32I-NEXT: slli s8, s8, 8
; RV32I-NEXT: lbu s3, 24(a0)
; RV32I-NEXT: lbu a1, 28(a0)
@@ -7873,170 +7914,174 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: slli s11, s11, 8
; RV32I-NEXT: or t6, t6, s10
; RV32I-NEXT: mv s9, s2
-; RV32I-NEXT: beqz s6, .LBB17_69
-; RV32I-NEXT: # %bb.68:
+; RV32I-NEXT: beqz s6, .LBB17_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: mv s9, t0
-; RV32I-NEXT: .LBB17_69:
+; RV32I-NEXT: .LBB17_66:
; RV32I-NEXT: or a0, s8, s3
; RV32I-NEXT: slli t0, s4, 16
; RV32I-NEXT: or a1, s11, a1
; RV32I-NEXT: slli t6, t6, 16
-; RV32I-NEXT: bltu s6, t4, .LBB17_71
-; RV32I-NEXT: # %bb.70:
+; RV32I-NEXT: bltu s6, t4, .LBB17_68
+; RV32I-NEXT: # %bb.67:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: j .LBB17_72
-; RV32I-NEXT: .LBB17_71:
+; RV32I-NEXT: j .LBB17_69
+; RV32I-NEXT: .LBB17_68:
; RV32I-NEXT: srl s4, s1, s0
-; RV32I-NEXT: .LBB17_72:
+; RV32I-NEXT: .LBB17_69:
; RV32I-NEXT: li s11, 64
; RV32I-NEXT: or s6, t0, a0
; RV32I-NEXT: or a0, t6, a1
-; RV32I-NEXT: bltu a4, t4, .LBB17_74
-; RV32I-NEXT: # %bb.73:
+; RV32I-NEXT: bltu a4, t4, .LBB17_71
+; RV32I-NEXT: # %bb.70:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a1, s6, a4
-; RV32I-NEXT: j .LBB17_75
-; RV32I-NEXT: .LBB17_74:
+; RV32I-NEXT: mv s10, a0
+; RV32I-NEXT: bnez a4, .LBB17_72
+; RV32I-NEXT: j .LBB17_73
+; RV32I-NEXT: .LBB17_71:
; RV32I-NEXT: sll s3, s6, a4
; RV32I-NEXT: srl a1, s6, s0
; RV32I-NEXT: sll t0, a0, a4
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: .LBB17_75:
; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: beqz a4, .LBB17_77
-; RV32I-NEXT: # %bb.76:
+; RV32I-NEXT: beqz a4, .LBB17_73
+; RV32I-NEXT: .LBB17_72:
; RV32I-NEXT: mv s10, a1
-; RV32I-NEXT: .LBB17_77:
-; RV32I-NEXT: bltu s7, t4, .LBB17_79
-; RV32I-NEXT: # %bb.78:
+; RV32I-NEXT: .LBB17_73:
+; RV32I-NEXT: bltu s7, t4, .LBB17_75
+; RV32I-NEXT: # %bb.74:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, s2, s7
-; RV32I-NEXT: j .LBB17_80
-; RV32I-NEXT: .LBB17_79:
+; RV32I-NEXT: mv s0, s1
+; RV32I-NEXT: bnez s7, .LBB17_76
+; RV32I-NEXT: j .LBB17_77
+; RV32I-NEXT: .LBB17_75:
; RV32I-NEXT: sll s5, s2, a4
; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: srl a1, s2, a1
; RV32I-NEXT: or a1, a1, ra
-; RV32I-NEXT: .LBB17_80:
; RV32I-NEXT: mv s0, s1
-; RV32I-NEXT: beqz s7, .LBB17_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: beqz s7, .LBB17_77
+; RV32I-NEXT: .LBB17_76:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB17_82:
-; RV32I-NEXT: bltu a4, s11, .LBB17_84
-; RV32I-NEXT: # %bb.83:
+; RV32I-NEXT: .LBB17_77:
+; RV32I-NEXT: bltu a4, s11, .LBB17_79
+; RV32I-NEXT: # %bb.78:
; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: li t5, 0
-; RV32I-NEXT: j .LBB17_85
-; RV32I-NEXT: .LBB17_84:
+; RV32I-NEXT: j .LBB17_80
+; RV32I-NEXT: .LBB17_79:
; RV32I-NEXT: or s5, s9, s3
; RV32I-NEXT: or s0, s4, s10
-; RV32I-NEXT: .LBB17_85:
+; RV32I-NEXT: .LBB17_80:
; RV32I-NEXT: addi s9, a4, -128
; RV32I-NEXT: mv s7, s6
; RV32I-NEXT: mv s8, a0
-; RV32I-NEXT: beqz a4, .LBB17_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: beqz a4, .LBB17_82
+; RV32I-NEXT: # %bb.81:
; RV32I-NEXT: mv s7, s5
; RV32I-NEXT: mv s8, s0
-; RV32I-NEXT: .LBB17_87:
+; RV32I-NEXT: .LBB17_82:
; RV32I-NEXT: neg s3, s9
; RV32I-NEXT: srl s0, t3, s3
-; RV32I-NEXT: bltu s9, t4, .LBB17_89
-; RV32I-NEXT: # %bb.88:
+; RV32I-NEXT: bltu s9, t4, .LBB17_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: sll a1, t3, s9
-; RV32I-NEXT: j .LBB17_90
-; RV32I-NEXT: .LBB17_89:
+; RV32I-NEXT: j .LBB17_85
+; RV32I-NEXT: .LBB17_84:
; RV32I-NEXT: sll s5, t3, a4
; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, s0, a1
-; RV32I-NEXT: .LBB17_90:
+; RV32I-NEXT: .LBB17_85:
; RV32I-NEXT: sub s4, s11, s9
; RV32I-NEXT: mv t6, a5
-; RV32I-NEXT: beqz s9, .LBB17_92
-; RV32I-NEXT: # %bb.91:
+; RV32I-NEXT: beqz s9, .LBB17_87
+; RV32I-NEXT: # %bb.86:
; RV32I-NEXT: mv t6, a1
-; RV32I-NEXT: .LBB17_92:
-; RV32I-NEXT: bltu s4, t4, .LBB17_94
-; RV32I-NEXT: # %bb.93:
+; RV32I-NEXT: .LBB17_87:
+; RV32I-NEXT: bltu s4, t4, .LBB17_89
+; RV32I-NEXT: # %bb.88:
; RV32I-NEXT: srl a1, a5, s4
-; RV32I-NEXT: j .LBB17_95
-; RV32I-NEXT: .LBB17_94:
+; RV32I-NEXT: mv s0, t3
+; RV32I-NEXT: bnez s4, .LBB17_90
+; RV32I-NEXT: j .LBB17_91
+; RV32I-NEXT: .LBB17_89:
; RV32I-NEXT: neg a1, s4
; RV32I-NEXT: sll a1, a5, a1
; RV32I-NEXT: or a1, s0, a1
-; RV32I-NEXT: .LBB17_95:
; RV32I-NEXT: mv s0, t3
-; RV32I-NEXT: beqz s4, .LBB17_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: beqz s4, .LBB17_91
+; RV32I-NEXT: .LBB17_90:
; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: .LBB17_97:
-; RV32I-NEXT: bltu s4, t4, .LBB17_101
-; RV32I-NEXT: # %bb.98:
+; RV32I-NEXT: .LBB17_91:
+; RV32I-NEXT: bltu s4, t4, .LBB17_94
+; RV32I-NEXT: # %bb.92:
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: .LBB17_99:
; RV32I-NEXT: li ra, 64
-; RV32I-NEXT: bgeu s9, t4, .LBB17_102
-; RV32I-NEXT: # %bb.100:
+; RV32I-NEXT: bgeu s9, t4, .LBB17_95
+; RV32I-NEXT: .LBB17_93:
; RV32I-NEXT: sll s10, t1, a4
; RV32I-NEXT: srl a1, t1, s3
; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: j .LBB17_103
-; RV32I-NEXT: .LBB17_101:
+; RV32I-NEXT: j .LBB17_96
+; RV32I-NEXT: .LBB17_94:
; RV32I-NEXT: srl s4, a5, s3
-; RV32I-NEXT: j .LBB17_99
-; RV32I-NEXT: .LBB17_102:
+; RV32I-NEXT: li ra, 64
+; RV32I-NEXT: bltu s9, t4, .LBB17_93
+; RV32I-NEXT: .LBB17_95:
; RV32I-NEXT: li s10, 0
; RV32I-NEXT: sll a1, t1, s9
-; RV32I-NEXT: .LBB17_103:
+; RV32I-NEXT: .LBB17_96:
; RV32I-NEXT: addi s11, s9, -64
; RV32I-NEXT: mv s3, t2
-; RV32I-NEXT: beqz s9, .LBB17_105
-; RV32I-NEXT: # %bb.104:
+; RV32I-NEXT: beqz s9, .LBB17_98
+; RV32I-NEXT: # %bb.97:
; RV32I-NEXT: mv s3, a1
-; RV32I-NEXT: .LBB17_105:
-; RV32I-NEXT: bltu s11, t4, .LBB17_107
-; RV32I-NEXT: # %bb.106:
+; RV32I-NEXT: .LBB17_98:
+; RV32I-NEXT: bltu s11, t4, .LBB17_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: li t4, 0
; RV32I-NEXT: sll a1, t3, s11
-; RV32I-NEXT: bnez s11, .LBB17_108
-; RV32I-NEXT: j .LBB17_109
-; RV32I-NEXT: .LBB17_107:
+; RV32I-NEXT: bnez s11, .LBB17_101
+; RV32I-NEXT: j .LBB17_102
+; RV32I-NEXT: .LBB17_100:
; RV32I-NEXT: sll t4, t3, s9
; RV32I-NEXT: neg a1, s11
; RV32I-NEXT: srl a1, t3, a1
; RV32I-NEXT: sll t0, a5, s9
; RV32I-NEXT: or a1, a1, t0
-; RV32I-NEXT: beqz s11, .LBB17_109
-; RV32I-NEXT: .LBB17_108:
+; RV32I-NEXT: beqz s11, .LBB17_102
+; RV32I-NEXT: .LBB17_101:
; RV32I-NEXT: mv a5, a1
-; RV32I-NEXT: .LBB17_109:
-; RV32I-NEXT: bltu s9, ra, .LBB17_111
-; RV32I-NEXT: # %bb.110:
+; RV32I-NEXT: .LBB17_102:
+; RV32I-NEXT: bltu s9, ra, .LBB17_104
+; RV32I-NEXT: # %bb.103:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li t6, 0
-; RV32I-NEXT: j .LBB17_112
-; RV32I-NEXT: .LBB17_111:
+; RV32I-NEXT: li a1, 128
+; RV32I-NEXT: bnez s9, .LBB17_105
+; RV32I-NEXT: j .LBB17_106
+; RV32I-NEXT: .LBB17_104:
; RV32I-NEXT: or t4, s0, s10
; RV32I-NEXT: or a5, s4, s3
-; RV32I-NEXT: .LBB17_112:
; RV32I-NEXT: li a1, 128
-; RV32I-NEXT: beqz s9, .LBB17_114
-; RV32I-NEXT: # %bb.113:
+; RV32I-NEXT: beqz s9, .LBB17_106
+; RV32I-NEXT: .LBB17_105:
; RV32I-NEXT: mv t1, t4
; RV32I-NEXT: mv t2, a5
-; RV32I-NEXT: .LBB17_114:
-; RV32I-NEXT: bltu a4, a1, .LBB17_116
-; RV32I-NEXT: # %bb.115:
+; RV32I-NEXT: .LBB17_106:
+; RV32I-NEXT: bltu a4, a1, .LBB17_108
+; RV32I-NEXT: # %bb.107:
; RV32I-NEXT: li ra, 0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: li a6, 0
-; RV32I-NEXT: bnez a4, .LBB17_117
-; RV32I-NEXT: j .LBB17_118
-; RV32I-NEXT: .LBB17_116:
+; RV32I-NEXT: bnez a4, .LBB17_109
+; RV32I-NEXT: j .LBB17_110
+; RV32I-NEXT: .LBB17_108:
; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s5, a1, a5
@@ -8047,13 +8092,13 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t2, a1, s8
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: beqz a4, .LBB17_118
-; RV32I-NEXT: .LBB17_117:
+; RV32I-NEXT: beqz a4, .LBB17_110
+; RV32I-NEXT: .LBB17_109:
; RV32I-NEXT: mv s2, s5
; RV32I-NEXT: mv s1, t6
; RV32I-NEXT: mv s6, t1
; RV32I-NEXT: mv a0, t2
-; RV32I-NEXT: .LBB17_118:
+; RV32I-NEXT: .LBB17_110:
; RV32I-NEXT: srli a4, ra, 16
; RV32I-NEXT: lui t2, 16
; RV32I-NEXT: srli t1, ra, 24
@@ -8681,268 +8726,276 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: srl a3, a0, a5
-; RV32I-NEXT: j .LBB18_40
+; RV32I-NEXT: mv a6, s0
+; RV32I-NEXT: bnez a5, .LBB18_40
+; RV32I-NEXT: j .LBB18_41
; RV32I-NEXT: .LBB18_39:
; RV32I-NEXT: srl s4, t3, a5
; RV32I-NEXT: srl a3, s0, a5
; RV32I-NEXT: sll a6, a0, s6
; RV32I-NEXT: or a3, a3, a6
-; RV32I-NEXT: .LBB18_40:
; RV32I-NEXT: mv a6, s0
-; RV32I-NEXT: beqz a5, .LBB18_42
-; RV32I-NEXT: # %bb.41:
+; RV32I-NEXT: beqz a5, .LBB18_41
+; RV32I-NEXT: .LBB18_40:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB18_42:
-; RV32I-NEXT: bltu a5, t5, .LBB18_45
-; RV32I-NEXT: # %bb.43:
+; RV32I-NEXT: .LBB18_41:
+; RV32I-NEXT: bltu a5, t5, .LBB18_44
+; RV32I-NEXT: # %bb.42:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t5, .LBB18_46
-; RV32I-NEXT: .LBB18_44:
+; RV32I-NEXT: bgeu s7, t5, .LBB18_45
+; RV32I-NEXT: .LBB18_43:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
+; RV32I-NEXT: mv s10, t3
+; RV32I-NEXT: bnez s7, .LBB18_46
; RV32I-NEXT: j .LBB18_47
-; RV32I-NEXT: .LBB18_45:
+; RV32I-NEXT: .LBB18_44:
; RV32I-NEXT: srl s1, a0, a5
-; RV32I-NEXT: bltu s7, t5, .LBB18_44
-; RV32I-NEXT: .LBB18_46:
+; RV32I-NEXT: bltu s7, t5, .LBB18_43
+; RV32I-NEXT: .LBB18_45:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: .LBB18_47:
; RV32I-NEXT: mv s10, t3
-; RV32I-NEXT: beqz s7, .LBB18_49
-; RV32I-NEXT: # %bb.48:
+; RV32I-NEXT: beqz s7, .LBB18_47
+; RV32I-NEXT: .LBB18_46:
; RV32I-NEXT: mv s10, a3
-; RV32I-NEXT: .LBB18_49:
-; RV32I-NEXT: bltu s9, t5, .LBB18_51
-; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: .LBB18_47:
+; RV32I-NEXT: bltu s9, t5, .LBB18_49
+; RV32I-NEXT: # %bb.48:
; RV32I-NEXT: srl a3, t3, s9
-; RV32I-NEXT: j .LBB18_52
-; RV32I-NEXT: .LBB18_51:
+; RV32I-NEXT: mv s2, t4
+; RV32I-NEXT: bnez s9, .LBB18_50
+; RV32I-NEXT: j .LBB18_51
+; RV32I-NEXT: .LBB18_49:
; RV32I-NEXT: sll a3, t3, s11
; RV32I-NEXT: or a3, s2, a3
-; RV32I-NEXT: .LBB18_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB18_54
-; RV32I-NEXT: # %bb.53:
+; RV32I-NEXT: beqz s9, .LBB18_51
+; RV32I-NEXT: .LBB18_50:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB18_54:
-; RV32I-NEXT: bltu s9, t5, .LBB18_56
-; RV32I-NEXT: # %bb.55:
+; RV32I-NEXT: .LBB18_51:
+; RV32I-NEXT: bltu s9, t5, .LBB18_53
+; RV32I-NEXT: # %bb.52:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a5, t6, .LBB18_57
-; RV32I-NEXT: j .LBB18_58
-; RV32I-NEXT: .LBB18_56:
+; RV32I-NEXT: bltu a5, t6, .LBB18_54
+; RV32I-NEXT: j .LBB18_55
+; RV32I-NEXT: .LBB18_53:
; RV32I-NEXT: srl s7, t3, a5
-; RV32I-NEXT: bgeu a5, t6, .LBB18_58
-; RV32I-NEXT: .LBB18_57:
+; RV32I-NEXT: bgeu a5, t6, .LBB18_55
+; RV32I-NEXT: .LBB18_54:
; RV32I-NEXT: or s2, a6, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB18_58:
+; RV32I-NEXT: .LBB18_55:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a6, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a5, .LBB18_60
-; RV32I-NEXT: # %bb.59:
+; RV32I-NEXT: beqz a5, .LBB18_57
+; RV32I-NEXT: # %bb.56:
; RV32I-NEXT: mv a6, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB18_60:
+; RV32I-NEXT: .LBB18_57:
; RV32I-NEXT: sw a6, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a5
-; RV32I-NEXT: bltu a5, t6, .LBB18_62
-; RV32I-NEXT: # %bb.61:
+; RV32I-NEXT: bltu a5, t6, .LBB18_59
+; RV32I-NEXT: # %bb.58:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: .LBB18_62:
+; RV32I-NEXT: .LBB18_59:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a6, t1, s3
; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t5, .LBB18_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: bltu s2, t5, .LBB18_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: j .LBB18_65
-; RV32I-NEXT: .LBB18_64:
+; RV32I-NEXT: j .LBB18_62
+; RV32I-NEXT: .LBB18_61:
; RV32I-NEXT: sll s11, t1, s6
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB18_65:
+; RV32I-NEXT: .LBB18_62:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a7
-; RV32I-NEXT: beqz s2, .LBB18_67
-; RV32I-NEXT: # %bb.66:
+; RV32I-NEXT: beqz s2, .LBB18_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB18_67:
-; RV32I-NEXT: bltu s1, t5, .LBB18_69
-; RV32I-NEXT: # %bb.68:
+; RV32I-NEXT: .LBB18_64:
+; RV32I-NEXT: bltu s1, t5, .LBB18_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: srl a3, a7, s1
-; RV32I-NEXT: j .LBB18_70
-; RV32I-NEXT: .LBB18_69:
+; RV32I-NEXT: mv a6, t1
+; RV32I-NEXT: bnez s1, .LBB18_67
+; RV32I-NEXT: j .LBB18_68
+; RV32I-NEXT: .LBB18_66:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a7, a3
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB18_70:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz s1, .LBB18_72
-; RV32I-NEXT: # %bb.71:
+; RV32I-NEXT: beqz s1, .LBB18_68
+; RV32I-NEXT: .LBB18_67:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB18_72:
-; RV32I-NEXT: bltu s1, t5, .LBB18_76
-; RV32I-NEXT: # %bb.73:
+; RV32I-NEXT: .LBB18_68:
+; RV32I-NEXT: bltu s1, t5, .LBB18_71
+; RV32I-NEXT: # %bb.69:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: .LBB18_74:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t5, .LBB18_77
-; RV32I-NEXT: # %bb.75:
+; RV32I-NEXT: bgeu s2, t5, .LBB18_72
+; RV32I-NEXT: .LBB18_70:
; RV32I-NEXT: sll s6, t2, s6
; RV32I-NEXT: srl a3, t2, s3
; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB18_78
-; RV32I-NEXT: .LBB18_76:
+; RV32I-NEXT: j .LBB18_73
+; RV32I-NEXT: .LBB18_71:
; RV32I-NEXT: srl s1, a7, s3
-; RV32I-NEXT: j .LBB18_74
-; RV32I-NEXT: .LBB18_77:
+; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: bltu s2, t5, .LBB18_70
+; RV32I-NEXT: .LBB18_72:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t2, s2
-; RV32I-NEXT: .LBB18_78:
+; RV32I-NEXT: .LBB18_73:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: beqz s2, .LBB18_80
-; RV32I-NEXT: # %bb.79:
+; RV32I-NEXT: beqz s2, .LBB18_75
+; RV32I-NEXT: # %bb.74:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB18_80:
-; RV32I-NEXT: bltu s9, t5, .LBB18_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: .LBB18_75:
+; RV32I-NEXT: bltu s9, t5, .LBB18_77
+; RV32I-NEXT: # %bb.76:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t1, s9
-; RV32I-NEXT: j .LBB18_83
-; RV32I-NEXT: .LBB18_82:
+; RV32I-NEXT: mv s7, a7
+; RV32I-NEXT: bnez s9, .LBB18_78
+; RV32I-NEXT: j .LBB18_79
+; RV32I-NEXT: .LBB18_77:
; RV32I-NEXT: sll s3, t1, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: sll s4, a7, s2
; RV32I-NEXT: or a3, a3, s4
-; RV32I-NEXT: .LBB18_83:
; RV32I-NEXT: mv s7, a7
-; RV32I-NEXT: beqz s9, .LBB18_85
-; RV32I-NEXT: # %bb.84:
+; RV32I-NEXT: beqz s9, .LBB18_79
+; RV32I-NEXT: .LBB18_78:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB18_85:
-; RV32I-NEXT: bltu s2, t6, .LBB18_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: .LBB18_79:
+; RV32I-NEXT: bltu s2, t6, .LBB18_81
+; RV32I-NEXT: # %bb.80:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB18_88
-; RV32I-NEXT: .LBB18_87:
+; RV32I-NEXT: j .LBB18_82
+; RV32I-NEXT: .LBB18_81:
; RV32I-NEXT: or s3, a6, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB18_88:
+; RV32I-NEXT: .LBB18_82:
; RV32I-NEXT: addi ra, a5, -128
; RV32I-NEXT: mv s4, t2
; RV32I-NEXT: mv s6, a4
-; RV32I-NEXT: beqz s2, .LBB18_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: beqz s2, .LBB18_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: mv s4, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB18_90:
+; RV32I-NEXT: .LBB18_84:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, a4, s9
-; RV32I-NEXT: bltu ra, t5, .LBB18_92
-; RV32I-NEXT: # %bb.91:
+; RV32I-NEXT: bltu ra, t5, .LBB18_86
+; RV32I-NEXT: # %bb.85:
; RV32I-NEXT: sra a3, a4, ra
-; RV32I-NEXT: j .LBB18_93
-; RV32I-NEXT: .LBB18_92:
+; RV32I-NEXT: mv s1, t2
+; RV32I-NEXT: bnez ra, .LBB18_87
+; RV32I-NEXT: j .LBB18_88
+; RV32I-NEXT: .LBB18_86:
; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB18_93:
; RV32I-NEXT: mv s1, t2
-; RV32I-NEXT: beqz ra, .LBB18_95
-; RV32I-NEXT: # %bb.94:
+; RV32I-NEXT: beqz ra, .LBB18_88
+; RV32I-NEXT: .LBB18_87:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB18_95:
-; RV32I-NEXT: bltu ra, t5, .LBB18_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: .LBB18_88:
+; RV32I-NEXT: bltu ra, t5, .LBB18_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: srai s2, a4, 31
; RV32I-NEXT: srl a3, a7, ra
-; RV32I-NEXT: j .LBB18_98
-; RV32I-NEXT: .LBB18_97:
+; RV32I-NEXT: mv a6, t1
+; RV32I-NEXT: bnez ra, .LBB18_91
+; RV32I-NEXT: j .LBB18_92
+; RV32I-NEXT: .LBB18_90:
; RV32I-NEXT: sra s2, a4, a5
; RV32I-NEXT: sll a3, a7, s9
; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB18_98:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz ra, .LBB18_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: beqz ra, .LBB18_92
+; RV32I-NEXT: .LBB18_91:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB18_100:
+; RV32I-NEXT: .LBB18_92:
; RV32I-NEXT: mv s5, t0
; RV32I-NEXT: sub s10, t6, ra
; RV32I-NEXT: li t0, 64
-; RV32I-NEXT: bltu ra, t5, .LBB18_102
-; RV32I-NEXT: # %bb.101:
+; RV32I-NEXT: bltu ra, t5, .LBB18_94
+; RV32I-NEXT: # %bb.93:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: j .LBB18_103
-; RV32I-NEXT: .LBB18_102:
+; RV32I-NEXT: j .LBB18_95
+; RV32I-NEXT: .LBB18_94:
; RV32I-NEXT: srl s7, a7, a5
-; RV32I-NEXT: .LBB18_103:
+; RV32I-NEXT: .LBB18_95:
; RV32I-NEXT: mv t6, s8
; RV32I-NEXT: mv s8, s11
-; RV32I-NEXT: bltu s10, t5, .LBB18_105
-; RV32I-NEXT: # %bb.104:
+; RV32I-NEXT: bltu s10, t5, .LBB18_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t2, s10
-; RV32I-NEXT: j .LBB18_106
-; RV32I-NEXT: .LBB18_105:
+; RV32I-NEXT: j .LBB18_98
+; RV32I-NEXT: .LBB18_97:
; RV32I-NEXT: sll s9, t2, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t2, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB18_106:
+; RV32I-NEXT: .LBB18_98:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, a4
-; RV32I-NEXT: beqz s10, .LBB18_108
-; RV32I-NEXT: # %bb.107:
+; RV32I-NEXT: beqz s10, .LBB18_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB18_108:
-; RV32I-NEXT: bltu s11, t5, .LBB18_110
-; RV32I-NEXT: # %bb.109:
+; RV32I-NEXT: .LBB18_100:
+; RV32I-NEXT: bltu s11, t5, .LBB18_102
+; RV32I-NEXT: # %bb.101:
; RV32I-NEXT: sra a3, a4, s11
-; RV32I-NEXT: bnez s11, .LBB18_111
-; RV32I-NEXT: j .LBB18_112
-; RV32I-NEXT: .LBB18_110:
+; RV32I-NEXT: bnez s11, .LBB18_103
+; RV32I-NEXT: j .LBB18_104
+; RV32I-NEXT: .LBB18_102:
; RV32I-NEXT: srl a3, t2, ra
; RV32I-NEXT: mv s10, s4
; RV32I-NEXT: neg s4, s11
; RV32I-NEXT: sll s4, a4, s4
; RV32I-NEXT: or a3, a3, s4
; RV32I-NEXT: mv s4, s10
-; RV32I-NEXT: beqz s11, .LBB18_112
-; RV32I-NEXT: .LBB18_111:
+; RV32I-NEXT: beqz s11, .LBB18_104
+; RV32I-NEXT: .LBB18_103:
; RV32I-NEXT: mv t2, a3
-; RV32I-NEXT: .LBB18_112:
-; RV32I-NEXT: bltu s11, t5, .LBB18_114
-; RV32I-NEXT: # %bb.113:
+; RV32I-NEXT: .LBB18_104:
+; RV32I-NEXT: bltu s11, t5, .LBB18_106
+; RV32I-NEXT: # %bb.105:
; RV32I-NEXT: srai t5, a4, 31
-; RV32I-NEXT: j .LBB18_115
-; RV32I-NEXT: .LBB18_114:
+; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: bltu ra, t0, .LBB18_107
+; RV32I-NEXT: j .LBB18_108
+; RV32I-NEXT: .LBB18_106:
; RV32I-NEXT: sra t5, a4, ra
-; RV32I-NEXT: .LBB18_115:
; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bgeu ra, t0, .LBB18_117
-; RV32I-NEXT: # %bb.116:
+; RV32I-NEXT: bgeu ra, t0, .LBB18_108
+; RV32I-NEXT: .LBB18_107:
; RV32I-NEXT: or t2, a6, s9
; RV32I-NEXT: or t5, s7, s3
-; RV32I-NEXT: .LBB18_117:
+; RV32I-NEXT: .LBB18_108:
; RV32I-NEXT: li a6, 128
-; RV32I-NEXT: bnez ra, .LBB18_126
-; RV32I-NEXT: # %bb.118:
-; RV32I-NEXT: bgeu ra, t0, .LBB18_127
-; RV32I-NEXT: .LBB18_119:
-; RV32I-NEXT: bgeu a5, a6, .LBB18_121
-; RV32I-NEXT: .LBB18_120:
+; RV32I-NEXT: bnez ra, .LBB18_117
+; RV32I-NEXT: # %bb.109:
+; RV32I-NEXT: bgeu ra, t0, .LBB18_118
+; RV32I-NEXT: .LBB18_110:
+; RV32I-NEXT: bgeu a5, a6, .LBB18_112
+; RV32I-NEXT: .LBB18_111:
; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t1, a3, s8
; RV32I-NEXT: lw a3, 4(sp) # 4-byte Folded Reload
@@ -8951,23 +9004,23 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or s1, a3, s4
; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: .LBB18_121:
+; RV32I-NEXT: .LBB18_112:
; RV32I-NEXT: lw ra, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: mv t0, s5
-; RV32I-NEXT: beqz a5, .LBB18_123
-; RV32I-NEXT: # %bb.122:
+; RV32I-NEXT: beqz a5, .LBB18_114
+; RV32I-NEXT: # %bb.113:
; RV32I-NEXT: mv s0, t1
; RV32I-NEXT: mv a0, a7
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t3, s2
-; RV32I-NEXT: .LBB18_123:
-; RV32I-NEXT: bltu a5, a6, .LBB18_125
-; RV32I-NEXT: # %bb.124:
+; RV32I-NEXT: .LBB18_114:
+; RV32I-NEXT: bltu a5, a6, .LBB18_116
+; RV32I-NEXT: # %bb.115:
; RV32I-NEXT: srai a1, a4, 31
; RV32I-NEXT: mv t0, a1
; RV32I-NEXT: mv s11, a1
; RV32I-NEXT: mv ra, a1
-; RV32I-NEXT: .LBB18_125:
+; RV32I-NEXT: .LBB18_116:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli a7, s0, 24
@@ -9049,15 +9102,15 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB18_126:
+; RV32I-NEXT: .LBB18_117:
; RV32I-NEXT: mv t1, t2
; RV32I-NEXT: mv a7, t5
-; RV32I-NEXT: bltu ra, t0, .LBB18_119
-; RV32I-NEXT: .LBB18_127:
+; RV32I-NEXT: bltu ra, t0, .LBB18_110
+; RV32I-NEXT: .LBB18_118:
; RV32I-NEXT: srai s1, a4, 31
; RV32I-NEXT: mv s2, s1
-; RV32I-NEXT: bltu a5, a6, .LBB18_120
-; RV32I-NEXT: j .LBB18_121
+; RV32I-NEXT: bltu a5, a6, .LBB18_111
+; RV32I-NEXT: j .LBB18_112
%src = load i256, ptr %src.ptr, align 1
%byteOff = load i256, ptr %byteOff.ptr, align 1
%bitOff = shl i256 %byteOff, 3
@@ -9604,268 +9657,276 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: srl a3, a0, a5
-; RV32I-NEXT: j .LBB19_40
+; RV32I-NEXT: mv a6, s0
+; RV32I-NEXT: bnez a5, .LBB19_40
+; RV32I-NEXT: j .LBB19_41
; RV32I-NEXT: .LBB19_39:
; RV32I-NEXT: srl s4, t3, a5
; RV32I-NEXT: srl a3, s0, a5
; RV32I-NEXT: sll a6, a0, s6
; RV32I-NEXT: or a3, a3, a6
-; RV32I-NEXT: .LBB19_40:
; RV32I-NEXT: mv a6, s0
-; RV32I-NEXT: beqz a5, .LBB19_42
-; RV32I-NEXT: # %bb.41:
+; RV32I-NEXT: beqz a5, .LBB19_41
+; RV32I-NEXT: .LBB19_40:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB19_42:
-; RV32I-NEXT: bltu a5, t5, .LBB19_45
-; RV32I-NEXT: # %bb.43:
+; RV32I-NEXT: .LBB19_41:
+; RV32I-NEXT: bltu a5, t5, .LBB19_44
+; RV32I-NEXT: # %bb.42:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t5, .LBB19_46
-; RV32I-NEXT: .LBB19_44:
+; RV32I-NEXT: bgeu s7, t5, .LBB19_45
+; RV32I-NEXT: .LBB19_43:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
+; RV32I-NEXT: mv s10, t3
+; RV32I-NEXT: bnez s7, .LBB19_46
; RV32I-NEXT: j .LBB19_47
-; RV32I-NEXT: .LBB19_45:
+; RV32I-NEXT: .LBB19_44:
; RV32I-NEXT: srl s1, a0, a5
-; RV32I-NEXT: bltu s7, t5, .LBB19_44
-; RV32I-NEXT: .LBB19_46:
+; RV32I-NEXT: bltu s7, t5, .LBB19_43
+; RV32I-NEXT: .LBB19_45:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: .LBB19_47:
; RV32I-NEXT: mv s10, t3
-; RV32I-NEXT: beqz s7, .LBB19_49
-; RV32I-NEXT: # %bb.48:
+; RV32I-NEXT: beqz s7, .LBB19_47
+; RV32I-NEXT: .LBB19_46:
; RV32I-NEXT: mv s10, a3
-; RV32I-NEXT: .LBB19_49:
-; RV32I-NEXT: bltu s9, t5, .LBB19_51
-; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: .LBB19_47:
+; RV32I-NEXT: bltu s9, t5, .LBB19_49
+; RV32I-NEXT: # %bb.48:
; RV32I-NEXT: srl a3, t3, s9
-; RV32I-NEXT: j .LBB19_52
-; RV32I-NEXT: .LBB19_51:
+; RV32I-NEXT: mv s2, t4
+; RV32I-NEXT: bnez s9, .LBB19_50
+; RV32I-NEXT: j .LBB19_51
+; RV32I-NEXT: .LBB19_49:
; RV32I-NEXT: sll a3, t3, s11
; RV32I-NEXT: or a3, s2, a3
-; RV32I-NEXT: .LBB19_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB19_54
-; RV32I-NEXT: # %bb.53:
+; RV32I-NEXT: beqz s9, .LBB19_51
+; RV32I-NEXT: .LBB19_50:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB19_54:
-; RV32I-NEXT: bltu s9, t5, .LBB19_56
-; RV32I-NEXT: # %bb.55:
+; RV32I-NEXT: .LBB19_51:
+; RV32I-NEXT: bltu s9, t5, .LBB19_53
+; RV32I-NEXT: # %bb.52:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a5, t6, .LBB19_57
-; RV32I-NEXT: j .LBB19_58
-; RV32I-NEXT: .LBB19_56:
+; RV32I-NEXT: bltu a5, t6, .LBB19_54
+; RV32I-NEXT: j .LBB19_55
+; RV32I-NEXT: .LBB19_53:
; RV32I-NEXT: srl s7, t3, a5
-; RV32I-NEXT: bgeu a5, t6, .LBB19_58
-; RV32I-NEXT: .LBB19_57:
+; RV32I-NEXT: bgeu a5, t6, .LBB19_55
+; RV32I-NEXT: .LBB19_54:
; RV32I-NEXT: or s2, a6, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB19_58:
+; RV32I-NEXT: .LBB19_55:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a6, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a5, .LBB19_60
-; RV32I-NEXT: # %bb.59:
+; RV32I-NEXT: beqz a5, .LBB19_57
+; RV32I-NEXT: # %bb.56:
; RV32I-NEXT: mv a6, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB19_60:
+; RV32I-NEXT: .LBB19_57:
; RV32I-NEXT: sw a6, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a5
-; RV32I-NEXT: bltu a5, t6, .LBB19_62
-; RV32I-NEXT: # %bb.61:
+; RV32I-NEXT: bltu a5, t6, .LBB19_59
+; RV32I-NEXT: # %bb.58:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: .LBB19_62:
+; RV32I-NEXT: .LBB19_59:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a6, t1, s3
; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t5, .LBB19_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: bltu s2, t5, .LBB19_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: j .LBB19_65
-; RV32I-NEXT: .LBB19_64:
+; RV32I-NEXT: j .LBB19_62
+; RV32I-NEXT: .LBB19_61:
; RV32I-NEXT: sll s11, t1, s6
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB19_65:
+; RV32I-NEXT: .LBB19_62:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a7
-; RV32I-NEXT: beqz s2, .LBB19_67
-; RV32I-NEXT: # %bb.66:
+; RV32I-NEXT: beqz s2, .LBB19_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB19_67:
-; RV32I-NEXT: bltu s1, t5, .LBB19_69
-; RV32I-NEXT: # %bb.68:
+; RV32I-NEXT: .LBB19_64:
+; RV32I-NEXT: bltu s1, t5, .LBB19_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: srl a3, a7, s1
-; RV32I-NEXT: j .LBB19_70
-; RV32I-NEXT: .LBB19_69:
+; RV32I-NEXT: mv a6, t1
+; RV32I-NEXT: bnez s1, .LBB19_67
+; RV32I-NEXT: j .LBB19_68
+; RV32I-NEXT: .LBB19_66:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a7, a3
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB19_70:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz s1, .LBB19_72
-; RV32I-NEXT: # %bb.71:
+; RV32I-NEXT: beqz s1, .LBB19_68
+; RV32I-NEXT: .LBB19_67:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB19_72:
-; RV32I-NEXT: bltu s1, t5, .LBB19_76
-; RV32I-NEXT: # %bb.73:
+; RV32I-NEXT: .LBB19_68:
+; RV32I-NEXT: bltu s1, t5, .LBB19_71
+; RV32I-NEXT: # %bb.69:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: .LBB19_74:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t5, .LBB19_77
-; RV32I-NEXT: # %bb.75:
+; RV32I-NEXT: bgeu s2, t5, .LBB19_72
+; RV32I-NEXT: .LBB19_70:
; RV32I-NEXT: sll s6, t2, s6
; RV32I-NEXT: srl a3, t2, s3
; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB19_78
-; RV32I-NEXT: .LBB19_76:
+; RV32I-NEXT: j .LBB19_73
+; RV32I-NEXT: .LBB19_71:
; RV32I-NEXT: srl s1, a7, s3
-; RV32I-NEXT: j .LBB19_74
-; RV32I-NEXT: .LBB19_77:
+; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: bltu s2, t5, .LBB19_70
+; RV32I-NEXT: .LBB19_72:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t2, s2
-; RV32I-NEXT: .LBB19_78:
+; RV32I-NEXT: .LBB19_73:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: beqz s2, .LBB19_80
-; RV32I-NEXT: # %bb.79:
+; RV32I-NEXT: beqz s2, .LBB19_75
+; RV32I-NEXT: # %bb.74:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB19_80:
-; RV32I-NEXT: bltu s9, t5, .LBB19_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: .LBB19_75:
+; RV32I-NEXT: bltu s9, t5, .LBB19_77
+; RV32I-NEXT: # %bb.76:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t1, s9
-; RV32I-NEXT: j .LBB19_83
-; RV32I-NEXT: .LBB19_82:
+; RV32I-NEXT: mv s7, a7
+; RV32I-NEXT: bnez s9, .LBB19_78
+; RV32I-NEXT: j .LBB19_79
+; RV32I-NEXT: .LBB19_77:
; RV32I-NEXT: sll s3, t1, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: sll s4, a7, s2
; RV32I-NEXT: or a3, a3, s4
-; RV32I-NEXT: .LBB19_83:
; RV32I-NEXT: mv s7, a7
-; RV32I-NEXT: beqz s9, .LBB19_85
-; RV32I-NEXT: # %bb.84:
+; RV32I-NEXT: beqz s9, .LBB19_79
+; RV32I-NEXT: .LBB19_78:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB19_85:
-; RV32I-NEXT: bltu s2, t6, .LBB19_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: .LBB19_79:
+; RV32I-NEXT: bltu s2, t6, .LBB19_81
+; RV32I-NEXT: # %bb.80:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB19_88
-; RV32I-NEXT: .LBB19_87:
+; RV32I-NEXT: j .LBB19_82
+; RV32I-NEXT: .LBB19_81:
; RV32I-NEXT: or s3, a6, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB19_88:
+; RV32I-NEXT: .LBB19_82:
; RV32I-NEXT: addi ra, a5, -128
; RV32I-NEXT: mv s4, t2
; RV32I-NEXT: mv s6, a4
-; RV32I-NEXT: beqz s2, .LBB19_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: beqz s2, .LBB19_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: mv s4, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB19_90:
+; RV32I-NEXT: .LBB19_84:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, a4, s9
-; RV32I-NEXT: bltu ra, t5, .LBB19_92
-; RV32I-NEXT: # %bb.91:
+; RV32I-NEXT: bltu ra, t5, .LBB19_86
+; RV32I-NEXT: # %bb.85:
; RV32I-NEXT: sra a3, a4, ra
-; RV32I-NEXT: j .LBB19_93
-; RV32I-NEXT: .LBB19_92:
+; RV32I-NEXT: mv s1, t2
+; RV32I-NEXT: bnez ra, .LBB19_87
+; RV32I-NEXT: j .LBB19_88
+; RV32I-NEXT: .LBB19_86:
; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB19_93:
; RV32I-NEXT: mv s1, t2
-; RV32I-NEXT: beqz ra, .LBB19_95
-; RV32I-NEXT: # %bb.94:
+; RV32I-NEXT: beqz ra, .LBB19_88
+; RV32I-NEXT: .LBB19_87:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB19_95:
-; RV32I-NEXT: bltu ra, t5, .LBB19_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: .LBB19_88:
+; RV32I-NEXT: bltu ra, t5, .LBB19_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: srai s2, a4, 31
; RV32I-NEXT: srl a3, a7, ra
-; RV32I-NEXT: j .LBB19_98
-; RV32I-NEXT: .LBB19_97:
+; RV32I-NEXT: mv a6, t1
+; RV32I-NEXT: bnez ra, .LBB19_91
+; RV32I-NEXT: j .LBB19_92
+; RV32I-NEXT: .LBB19_90:
; RV32I-NEXT: sra s2, a4, a5
; RV32I-NEXT: sll a3, a7, s9
; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB19_98:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz ra, .LBB19_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: beqz ra, .LBB19_92
+; RV32I-NEXT: .LBB19_91:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB19_100:
+; RV32I-NEXT: .LBB19_92:
; RV32I-NEXT: mv s5, t0
; RV32I-NEXT: sub s10, t6, ra
; RV32I-NEXT: li t0, 64
-; RV32I-NEXT: bltu ra, t5, .LBB19_102
-; RV32I-NEXT: # %bb.101:
+; RV32I-NEXT: bltu ra, t5, .LBB19_94
+; RV32I-NEXT: # %bb.93:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: j .LBB19_103
-; RV32I-NEXT: .LBB19_102:
+; RV32I-NEXT: j .LBB19_95
+; RV32I-NEXT: .LBB19_94:
; RV32I-NEXT: srl s7, a7, a5
-; RV32I-NEXT: .LBB19_103:
+; RV32I-NEXT: .LBB19_95:
; RV32I-NEXT: mv t6, s8
; RV32I-NEXT: mv s8, s11
-; RV32I-NEXT: bltu s10, t5, .LBB19_105
-; RV32I-NEXT: # %bb.104:
+; RV32I-NEXT: bltu s10, t5, .LBB19_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t2, s10
-; RV32I-NEXT: j .LBB19_106
-; RV32I-NEXT: .LBB19_105:
+; RV32I-NEXT: j .LBB19_98
+; RV32I-NEXT: .LBB19_97:
; RV32I-NEXT: sll s9, t2, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t2, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB19_106:
+; RV32I-NEXT: .LBB19_98:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, a4
-; RV32I-NEXT: beqz s10, .LBB19_108
-; RV32I-NEXT: # %bb.107:
+; RV32I-NEXT: beqz s10, .LBB19_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB19_108:
-; RV32I-NEXT: bltu s11, t5, .LBB19_110
-; RV32I-NEXT: # %bb.109:
+; RV32I-NEXT: .LBB19_100:
+; RV32I-NEXT: bltu s11, t5, .LBB19_102
+; RV32I-NEXT: # %bb.101:
; RV32I-NEXT: sra a3, a4, s11
-; RV32I-NEXT: bnez s11, .LBB19_111
-; RV32I-NEXT: j .LBB19_112
-; RV32I-NEXT: .LBB19_110:
+; RV32I-NEXT: bnez s11, .LBB19_103
+; RV32I-NEXT: j .LBB19_104
+; RV32I-NEXT: .LBB19_102:
; RV32I-NEXT: srl a3, t2, ra
; RV32I-NEXT: mv s10, s4
; RV32I-NEXT: neg s4, s11
; RV32I-NEXT: sll s4, a4, s4
; RV32I-NEXT: or a3, a3, s4
; RV32I-NEXT: mv s4, s10
-; RV32I-NEXT: beqz s11, .LBB19_112
-; RV32I-NEXT: .LBB19_111:
+; RV32I-NEXT: beqz s11, .LBB19_104
+; RV32I-NEXT: .LBB19_103:
; RV32I-NEXT: mv t2, a3
-; RV32I-NEXT: .LBB19_112:
-; RV32I-NEXT: bltu s11, t5, .LBB19_114
-; RV32I-NEXT: # %bb.113:
+; RV32I-NEXT: .LBB19_104:
+; RV32I-NEXT: bltu s11, t5, .LBB19_106
+; RV32I-NEXT: # %bb.105:
; RV32I-NEXT: srai t5, a4, 31
-; RV32I-NEXT: j .LBB19_115
-; RV32I-NEXT: .LBB19_114:
+; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: bltu ra, t0, .LBB19_107
+; RV32I-NEXT: j .LBB19_108
+; RV32I-NEXT: .LBB19_106:
; RV32I-NEXT: sra t5, a4, ra
-; RV32I-NEXT: .LBB19_115:
; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bgeu ra, t0, .LBB19_117
-; RV32I-NEXT: # %bb.116:
+; RV32I-NEXT: bgeu ra, t0, .LBB19_108
+; RV32I-NEXT: .LBB19_107:
; RV32I-NEXT: or t2, a6, s9
; RV32I-NEXT: or t5, s7, s3
-; RV32I-NEXT: .LBB19_117:
+; RV32I-NEXT: .LBB19_108:
; RV32I-NEXT: li a6, 128
-; RV32I-NEXT: bnez ra, .LBB19_126
-; RV32I-NEXT: # %bb.118:
-; RV32I-NEXT: bgeu ra, t0, .LBB19_127
-; RV32I-NEXT: .LBB19_119:
-; RV32I-NEXT: bgeu a5, a6, .LBB19_121
-; RV32I-NEXT: .LBB19_120:
+; RV32I-NEXT: bnez ra, .LBB19_117
+; RV32I-NEXT: # %bb.109:
+; RV32I-NEXT: bgeu ra, t0, .LBB19_118
+; RV32I-NEXT: .LBB19_110:
+; RV32I-NEXT: bgeu a5, a6, .LBB19_112
+; RV32I-NEXT: .LBB19_111:
; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t1, a3, s8
; RV32I-NEXT: lw a3, 4(sp) # 4-byte Folded Reload
@@ -9874,23 +9935,23 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: or s1, a3, s4
; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: .LBB19_121:
+; RV32I-NEXT: .LBB19_112:
; RV32I-NEXT: lw ra, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: mv t0, s5
-; RV32I-NEXT: beqz a5, .LBB19_123
-; RV32I-NEXT: # %bb.122:
+; RV32I-NEXT: beqz a5, .LBB19_114
+; RV32I-NEXT: # %bb.113:
; RV32I-NEXT: mv s0, t1
; RV32I-NEXT: mv a0, a7
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t3, s2
-; RV32I-NEXT: .LBB19_123:
-; RV32I-NEXT: bltu a5, a6, .LBB19_125
-; RV32I-NEXT: # %bb.124:
+; RV32I-NEXT: .LBB19_114:
+; RV32I-NEXT: bltu a5, a6, .LBB19_116
+; RV32I-NEXT: # %bb.115:
; RV32I-NEXT: srai a1, a4, 31
; RV32I-NEXT: mv t0, a1
; RV32I-NEXT: mv s11, a1
; RV32I-NEXT: mv ra, a1
-; RV32I-NEXT: .LBB19_125:
+; RV32I-NEXT: .LBB19_116:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli a7, s0, 24
@@ -9972,15 +10033,15 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB19_126:
+; RV32I-NEXT: .LBB19_117:
; RV32I-NEXT: mv t1, t2
; RV32I-NEXT: mv a7, t5
-; RV32I-NEXT: bltu ra, t0, .LBB19_119
-; RV32I-NEXT: .LBB19_127:
+; RV32I-NEXT: bltu ra, t0, .LBB19_110
+; RV32I-NEXT: .LBB19_118:
; RV32I-NEXT: srai s1, a4, 31
; RV32I-NEXT: mv s2, s1
-; RV32I-NEXT: bltu a5, a6, .LBB19_120
-; RV32I-NEXT: j .LBB19_121
+; RV32I-NEXT: bltu a5, a6, .LBB19_111
+; RV32I-NEXT: j .LBB19_112
%src = load i256, ptr %src.ptr, align 1
%wordOff = load i256, ptr %wordOff.ptr, align 1
%bitOff = shl i256 %wordOff, 5
@@ -10527,268 +10588,276 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: # %bb.38:
; RV32I-NEXT: li s4, 0
; RV32I-NEXT: srl a3, a0, a5
-; RV32I-NEXT: j .LBB20_40
+; RV32I-NEXT: mv a6, s0
+; RV32I-NEXT: bnez a5, .LBB20_40
+; RV32I-NEXT: j .LBB20_41
; RV32I-NEXT: .LBB20_39:
; RV32I-NEXT: srl s4, t3, a5
; RV32I-NEXT: srl a3, s0, a5
; RV32I-NEXT: sll a6, a0, s6
; RV32I-NEXT: or a3, a3, a6
-; RV32I-NEXT: .LBB20_40:
; RV32I-NEXT: mv a6, s0
-; RV32I-NEXT: beqz a5, .LBB20_42
-; RV32I-NEXT: # %bb.41:
+; RV32I-NEXT: beqz a5, .LBB20_41
+; RV32I-NEXT: .LBB20_40:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB20_42:
-; RV32I-NEXT: bltu a5, t5, .LBB20_45
-; RV32I-NEXT: # %bb.43:
+; RV32I-NEXT: .LBB20_41:
+; RV32I-NEXT: bltu a5, t5, .LBB20_44
+; RV32I-NEXT: # %bb.42:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: bgeu s7, t5, .LBB20_46
-; RV32I-NEXT: .LBB20_44:
+; RV32I-NEXT: bgeu s7, t5, .LBB20_45
+; RV32I-NEXT: .LBB20_43:
; RV32I-NEXT: sll s3, t4, s6
; RV32I-NEXT: srl a3, t4, s10
; RV32I-NEXT: or a3, a3, ra
+; RV32I-NEXT: mv s10, t3
+; RV32I-NEXT: bnez s7, .LBB20_46
; RV32I-NEXT: j .LBB20_47
-; RV32I-NEXT: .LBB20_45:
+; RV32I-NEXT: .LBB20_44:
; RV32I-NEXT: srl s1, a0, a5
-; RV32I-NEXT: bltu s7, t5, .LBB20_44
-; RV32I-NEXT: .LBB20_46:
+; RV32I-NEXT: bltu s7, t5, .LBB20_43
+; RV32I-NEXT: .LBB20_45:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t4, s7
-; RV32I-NEXT: .LBB20_47:
; RV32I-NEXT: mv s10, t3
-; RV32I-NEXT: beqz s7, .LBB20_49
-; RV32I-NEXT: # %bb.48:
+; RV32I-NEXT: beqz s7, .LBB20_47
+; RV32I-NEXT: .LBB20_46:
; RV32I-NEXT: mv s10, a3
-; RV32I-NEXT: .LBB20_49:
-; RV32I-NEXT: bltu s9, t5, .LBB20_51
-; RV32I-NEXT: # %bb.50:
+; RV32I-NEXT: .LBB20_47:
+; RV32I-NEXT: bltu s9, t5, .LBB20_49
+; RV32I-NEXT: # %bb.48:
; RV32I-NEXT: srl a3, t3, s9
-; RV32I-NEXT: j .LBB20_52
-; RV32I-NEXT: .LBB20_51:
+; RV32I-NEXT: mv s2, t4
+; RV32I-NEXT: bnez s9, .LBB20_50
+; RV32I-NEXT: j .LBB20_51
+; RV32I-NEXT: .LBB20_49:
; RV32I-NEXT: sll a3, t3, s11
; RV32I-NEXT: or a3, s2, a3
-; RV32I-NEXT: .LBB20_52:
; RV32I-NEXT: mv s2, t4
-; RV32I-NEXT: beqz s9, .LBB20_54
-; RV32I-NEXT: # %bb.53:
+; RV32I-NEXT: beqz s9, .LBB20_51
+; RV32I-NEXT: .LBB20_50:
; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: .LBB20_54:
-; RV32I-NEXT: bltu s9, t5, .LBB20_56
-; RV32I-NEXT: # %bb.55:
+; RV32I-NEXT: .LBB20_51:
+; RV32I-NEXT: bltu s9, t5, .LBB20_53
+; RV32I-NEXT: # %bb.52:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: bltu a5, t6, .LBB20_57
-; RV32I-NEXT: j .LBB20_58
-; RV32I-NEXT: .LBB20_56:
+; RV32I-NEXT: bltu a5, t6, .LBB20_54
+; RV32I-NEXT: j .LBB20_55
+; RV32I-NEXT: .LBB20_53:
; RV32I-NEXT: srl s7, t3, a5
-; RV32I-NEXT: bgeu a5, t6, .LBB20_58
-; RV32I-NEXT: .LBB20_57:
+; RV32I-NEXT: bgeu a5, t6, .LBB20_55
+; RV32I-NEXT: .LBB20_54:
; RV32I-NEXT: or s2, a6, s3
; RV32I-NEXT: or s7, s1, s10
-; RV32I-NEXT: .LBB20_58:
+; RV32I-NEXT: .LBB20_55:
; RV32I-NEXT: li a3, 128
; RV32I-NEXT: mv a6, s0
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: beqz a5, .LBB20_60
-; RV32I-NEXT: # %bb.59:
+; RV32I-NEXT: beqz a5, .LBB20_57
+; RV32I-NEXT: # %bb.56:
; RV32I-NEXT: mv a6, s2
; RV32I-NEXT: mv s1, s7
-; RV32I-NEXT: .LBB20_60:
+; RV32I-NEXT: .LBB20_57:
; RV32I-NEXT: sw a6, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sub s2, a3, a5
-; RV32I-NEXT: bltu a5, t6, .LBB20_62
-; RV32I-NEXT: # %bb.61:
+; RV32I-NEXT: bltu a5, t6, .LBB20_59
+; RV32I-NEXT: # %bb.58:
; RV32I-NEXT: li s5, 0
; RV32I-NEXT: li s4, 0
-; RV32I-NEXT: .LBB20_62:
+; RV32I-NEXT: .LBB20_59:
; RV32I-NEXT: neg s3, s2
; RV32I-NEXT: srl a6, t1, s3
; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bltu s2, t5, .LBB20_64
-; RV32I-NEXT: # %bb.63:
+; RV32I-NEXT: bltu s2, t5, .LBB20_61
+; RV32I-NEXT: # %bb.60:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: sll a3, t1, s2
-; RV32I-NEXT: j .LBB20_65
-; RV32I-NEXT: .LBB20_64:
+; RV32I-NEXT: j .LBB20_62
+; RV32I-NEXT: .LBB20_61:
; RV32I-NEXT: sll s11, t1, s6
; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB20_65:
+; RV32I-NEXT: .LBB20_62:
; RV32I-NEXT: sub s1, t6, s2
; RV32I-NEXT: mv s8, a7
-; RV32I-NEXT: beqz s2, .LBB20_67
-; RV32I-NEXT: # %bb.66:
+; RV32I-NEXT: beqz s2, .LBB20_64
+; RV32I-NEXT: # %bb.63:
; RV32I-NEXT: mv s8, a3
-; RV32I-NEXT: .LBB20_67:
-; RV32I-NEXT: bltu s1, t5, .LBB20_69
-; RV32I-NEXT: # %bb.68:
+; RV32I-NEXT: .LBB20_64:
+; RV32I-NEXT: bltu s1, t5, .LBB20_66
+; RV32I-NEXT: # %bb.65:
; RV32I-NEXT: srl a3, a7, s1
-; RV32I-NEXT: j .LBB20_70
-; RV32I-NEXT: .LBB20_69:
+; RV32I-NEXT: mv a6, t1
+; RV32I-NEXT: bnez s1, .LBB20_67
+; RV32I-NEXT: j .LBB20_68
+; RV32I-NEXT: .LBB20_66:
; RV32I-NEXT: neg a3, s1
; RV32I-NEXT: sll a3, a7, a3
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB20_70:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz s1, .LBB20_72
-; RV32I-NEXT: # %bb.71:
+; RV32I-NEXT: beqz s1, .LBB20_68
+; RV32I-NEXT: .LBB20_67:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB20_72:
-; RV32I-NEXT: bltu s1, t5, .LBB20_76
-; RV32I-NEXT: # %bb.73:
+; RV32I-NEXT: .LBB20_68:
+; RV32I-NEXT: bltu s1, t5, .LBB20_71
+; RV32I-NEXT: # %bb.69:
; RV32I-NEXT: li s1, 0
-; RV32I-NEXT: .LBB20_74:
; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s2, t5, .LBB20_77
-; RV32I-NEXT: # %bb.75:
+; RV32I-NEXT: bgeu s2, t5, .LBB20_72
+; RV32I-NEXT: .LBB20_70:
; RV32I-NEXT: sll s6, t2, s6
; RV32I-NEXT: srl a3, t2, s3
; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: j .LBB20_78
-; RV32I-NEXT: .LBB20_76:
+; RV32I-NEXT: j .LBB20_73
+; RV32I-NEXT: .LBB20_71:
; RV32I-NEXT: srl s1, a7, s3
-; RV32I-NEXT: j .LBB20_74
-; RV32I-NEXT: .LBB20_77:
+; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: bltu s2, t5, .LBB20_70
+; RV32I-NEXT: .LBB20_72:
; RV32I-NEXT: li s6, 0
; RV32I-NEXT: sll a3, t2, s2
-; RV32I-NEXT: .LBB20_78:
+; RV32I-NEXT: .LBB20_73:
; RV32I-NEXT: addi s9, s2, -64
; RV32I-NEXT: mv s5, a4
-; RV32I-NEXT: beqz s2, .LBB20_80
-; RV32I-NEXT: # %bb.79:
+; RV32I-NEXT: beqz s2, .LBB20_75
+; RV32I-NEXT: # %bb.74:
; RV32I-NEXT: mv s5, a3
-; RV32I-NEXT: .LBB20_80:
-; RV32I-NEXT: bltu s9, t5, .LBB20_82
-; RV32I-NEXT: # %bb.81:
+; RV32I-NEXT: .LBB20_75:
+; RV32I-NEXT: bltu s9, t5, .LBB20_77
+; RV32I-NEXT: # %bb.76:
; RV32I-NEXT: li s3, 0
; RV32I-NEXT: sll a3, t1, s9
-; RV32I-NEXT: j .LBB20_83
-; RV32I-NEXT: .LBB20_82:
+; RV32I-NEXT: mv s7, a7
+; RV32I-NEXT: bnez s9, .LBB20_78
+; RV32I-NEXT: j .LBB20_79
+; RV32I-NEXT: .LBB20_77:
; RV32I-NEXT: sll s3, t1, s2
; RV32I-NEXT: neg a3, s9
; RV32I-NEXT: srl a3, t1, a3
; RV32I-NEXT: sll s4, a7, s2
; RV32I-NEXT: or a3, a3, s4
-; RV32I-NEXT: .LBB20_83:
; RV32I-NEXT: mv s7, a7
-; RV32I-NEXT: beqz s9, .LBB20_85
-; RV32I-NEXT: # %bb.84:
+; RV32I-NEXT: beqz s9, .LBB20_79
+; RV32I-NEXT: .LBB20_78:
; RV32I-NEXT: mv s7, a3
-; RV32I-NEXT: .LBB20_85:
-; RV32I-NEXT: bltu s2, t6, .LBB20_87
-; RV32I-NEXT: # %bb.86:
+; RV32I-NEXT: .LBB20_79:
+; RV32I-NEXT: bltu s2, t6, .LBB20_81
+; RV32I-NEXT: # %bb.80:
; RV32I-NEXT: li s11, 0
; RV32I-NEXT: li s8, 0
-; RV32I-NEXT: j .LBB20_88
-; RV32I-NEXT: .LBB20_87:
+; RV32I-NEXT: j .LBB20_82
+; RV32I-NEXT: .LBB20_81:
; RV32I-NEXT: or s3, a6, s6
; RV32I-NEXT: or s7, s1, s5
-; RV32I-NEXT: .LBB20_88:
+; RV32I-NEXT: .LBB20_82:
; RV32I-NEXT: addi ra, a5, -128
; RV32I-NEXT: mv s4, t2
; RV32I-NEXT: mv s6, a4
-; RV32I-NEXT: beqz s2, .LBB20_90
-; RV32I-NEXT: # %bb.89:
+; RV32I-NEXT: beqz s2, .LBB20_84
+; RV32I-NEXT: # %bb.83:
; RV32I-NEXT: mv s4, s3
; RV32I-NEXT: mv s6, s7
-; RV32I-NEXT: .LBB20_90:
+; RV32I-NEXT: .LBB20_84:
; RV32I-NEXT: neg s9, ra
; RV32I-NEXT: sll s3, a4, s9
-; RV32I-NEXT: bltu ra, t5, .LBB20_92
-; RV32I-NEXT: # %bb.91:
+; RV32I-NEXT: bltu ra, t5, .LBB20_86
+; RV32I-NEXT: # %bb.85:
; RV32I-NEXT: sra a3, a4, ra
-; RV32I-NEXT: j .LBB20_93
-; RV32I-NEXT: .LBB20_92:
+; RV32I-NEXT: mv s1, t2
+; RV32I-NEXT: bnez ra, .LBB20_87
+; RV32I-NEXT: j .LBB20_88
+; RV32I-NEXT: .LBB20_86:
; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB20_93:
; RV32I-NEXT: mv s1, t2
-; RV32I-NEXT: beqz ra, .LBB20_95
-; RV32I-NEXT: # %bb.94:
+; RV32I-NEXT: beqz ra, .LBB20_88
+; RV32I-NEXT: .LBB20_87:
; RV32I-NEXT: mv s1, a3
-; RV32I-NEXT: .LBB20_95:
-; RV32I-NEXT: bltu ra, t5, .LBB20_97
-; RV32I-NEXT: # %bb.96:
+; RV32I-NEXT: .LBB20_88:
+; RV32I-NEXT: bltu ra, t5, .LBB20_90
+; RV32I-NEXT: # %bb.89:
; RV32I-NEXT: srai s2, a4, 31
; RV32I-NEXT: srl a3, a7, ra
-; RV32I-NEXT: j .LBB20_98
-; RV32I-NEXT: .LBB20_97:
+; RV32I-NEXT: mv a6, t1
+; RV32I-NEXT: bnez ra, .LBB20_91
+; RV32I-NEXT: j .LBB20_92
+; RV32I-NEXT: .LBB20_90:
; RV32I-NEXT: sra s2, a4, a5
; RV32I-NEXT: sll a3, a7, s9
; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: .LBB20_98:
; RV32I-NEXT: mv a6, t1
-; RV32I-NEXT: beqz ra, .LBB20_100
-; RV32I-NEXT: # %bb.99:
+; RV32I-NEXT: beqz ra, .LBB20_92
+; RV32I-NEXT: .LBB20_91:
; RV32I-NEXT: mv a6, a3
-; RV32I-NEXT: .LBB20_100:
+; RV32I-NEXT: .LBB20_92:
; RV32I-NEXT: mv s5, t0
; RV32I-NEXT: sub s10, t6, ra
; RV32I-NEXT: li t0, 64
-; RV32I-NEXT: bltu ra, t5, .LBB20_102
-; RV32I-NEXT: # %bb.101:
+; RV32I-NEXT: bltu ra, t5, .LBB20_94
+; RV32I-NEXT: # %bb.93:
; RV32I-NEXT: li s7, 0
-; RV32I-NEXT: j .LBB20_103
-; RV32I-NEXT: .LBB20_102:
+; RV32I-NEXT: j .LBB20_95
+; RV32I-NEXT: .LBB20_94:
; RV32I-NEXT: srl s7, a7, a5
-; RV32I-NEXT: .LBB20_103:
+; RV32I-NEXT: .LBB20_95:
; RV32I-NEXT: mv t6, s8
; RV32I-NEXT: mv s8, s11
-; RV32I-NEXT: bltu s10, t5, .LBB20_105
-; RV32I-NEXT: # %bb.104:
+; RV32I-NEXT: bltu s10, t5, .LBB20_97
+; RV32I-NEXT: # %bb.96:
; RV32I-NEXT: li s9, 0
; RV32I-NEXT: sll a3, t2, s10
-; RV32I-NEXT: j .LBB20_106
-; RV32I-NEXT: .LBB20_105:
+; RV32I-NEXT: j .LBB20_98
+; RV32I-NEXT: .LBB20_97:
; RV32I-NEXT: sll s9, t2, s9
; RV32I-NEXT: neg a3, s10
; RV32I-NEXT: srl a3, t2, a3
; RV32I-NEXT: or a3, a3, s3
-; RV32I-NEXT: .LBB20_106:
+; RV32I-NEXT: .LBB20_98:
; RV32I-NEXT: addi s11, ra, -64
; RV32I-NEXT: mv s3, a4
-; RV32I-NEXT: beqz s10, .LBB20_108
-; RV32I-NEXT: # %bb.107:
+; RV32I-NEXT: beqz s10, .LBB20_100
+; RV32I-NEXT: # %bb.99:
; RV32I-NEXT: mv s3, a3
-; RV32I-NEXT: .LBB20_108:
-; RV32I-NEXT: bltu s11, t5, .LBB20_110
-; RV32I-NEXT: # %bb.109:
+; RV32I-NEXT: .LBB20_100:
+; RV32I-NEXT: bltu s11, t5, .LBB20_102
+; RV32I-NEXT: # %bb.101:
; RV32I-NEXT: sra a3, a4, s11
-; RV32I-NEXT: bnez s11, .LBB20_111
-; RV32I-NEXT: j .LBB20_112
-; RV32I-NEXT: .LBB20_110:
+; RV32I-NEXT: bnez s11, .LBB20_103
+; RV32I-NEXT: j .LBB20_104
+; RV32I-NEXT: .LBB20_102:
; RV32I-NEXT: srl a3, t2, ra
; RV32I-NEXT: mv s10, s4
; RV32I-NEXT: neg s4, s11
; RV32I-NEXT: sll s4, a4, s4
; RV32I-NEXT: or a3, a3, s4
; RV32I-NEXT: mv s4, s10
-; RV32I-NEXT: beqz s11, .LBB20_112
-; RV32I-NEXT: .LBB20_111:
+; RV32I-NEXT: beqz s11, .LBB20_104
+; RV32I-NEXT: .LBB20_103:
; RV32I-NEXT: mv t2, a3
-; RV32I-NEXT: .LBB20_112:
-; RV32I-NEXT: bltu s11, t5, .LBB20_114
-; RV32I-NEXT: # %bb.113:
+; RV32I-NEXT: .LBB20_104:
+; RV32I-NEXT: bltu s11, t5, .LBB20_106
+; RV32I-NEXT: # %bb.105:
; RV32I-NEXT: srai t5, a4, 31
-; RV32I-NEXT: j .LBB20_115
-; RV32I-NEXT: .LBB20_114:
+; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT: bltu ra, t0, .LBB20_107
+; RV32I-NEXT: j .LBB20_108
+; RV32I-NEXT: .LBB20_106:
; RV32I-NEXT: sra t5, a4, ra
-; RV32I-NEXT: .LBB20_115:
; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: bgeu ra, t0, .LBB20_117
-; RV32I-NEXT: # %bb.116:
+; RV32I-NEXT: bgeu ra, t0, .LBB20_108
+; RV32I-NEXT: .LBB20_107:
; RV32I-NEXT: or t2, a6, s9
; RV32I-NEXT: or t5, s7, s3
-; RV32I-NEXT: .LBB20_117:
+; RV32I-NEXT: .LBB20_108:
; RV32I-NEXT: li a6, 128
-; RV32I-NEXT: bnez ra, .LBB20_126
-; RV32I-NEXT: # %bb.118:
-; RV32I-NEXT: bgeu ra, t0, .LBB20_127
-; RV32I-NEXT: .LBB20_119:
-; RV32I-NEXT: bgeu a5, a6, .LBB20_121
-; RV32I-NEXT: .LBB20_120:
+; RV32I-NEXT: bnez ra, .LBB20_117
+; RV32I-NEXT: # %bb.109:
+; RV32I-NEXT: bgeu ra, t0, .LBB20_118
+; RV32I-NEXT: .LBB20_110:
+; RV32I-NEXT: bgeu a5, a6, .LBB20_112
+; RV32I-NEXT: .LBB20_111:
; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: or t1, a3, s8
; RV32I-NEXT: lw a3, 4(sp) # 4-byte Folded Reload
@@ -10797,23 +10866,23 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: or s1, a3, s4
; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: or s2, a3, s6
-; RV32I-NEXT: .LBB20_121:
+; RV32I-NEXT: .LBB20_112:
; RV32I-NEXT: lw ra, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: mv t0, s5
-; RV32I-NEXT: beqz a5, .LBB20_123
-; RV32I-NEXT: # %bb.122:
+; RV32I-NEXT: beqz a5, .LBB20_114
+; RV32I-NEXT: # %bb.113:
; RV32I-NEXT: mv s0, t1
; RV32I-NEXT: mv a0, a7
; RV32I-NEXT: mv t4, s1
; RV32I-NEXT: mv t3, s2
-; RV32I-NEXT: .LBB20_123:
-; RV32I-NEXT: bltu a5, a6, .LBB20_125
-; RV32I-NEXT: # %bb.124:
+; RV32I-NEXT: .LBB20_114:
+; RV32I-NEXT: bltu a5, a6, .LBB20_116
+; RV32I-NEXT: # %bb.115:
; RV32I-NEXT: srai a1, a4, 31
; RV32I-NEXT: mv t0, a1
; RV32I-NEXT: mv s11, a1
; RV32I-NEXT: mv ra, a1
-; RV32I-NEXT: .LBB20_125:
+; RV32I-NEXT: .LBB20_116:
; RV32I-NEXT: srli a4, s0, 16
; RV32I-NEXT: lui t1, 16
; RV32I-NEXT: srli a7, s0, 24
@@ -10895,15 +10964,15 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 96
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB20_126:
+; RV32I-NEXT: .LBB20_117:
; RV32I-NEXT: mv t1, t2
; RV32I-NEXT: mv a7, t5
-; RV32I-NEXT: bltu ra, t0, .LBB20_119
-; RV32I-NEXT: .LBB20_127:
+; RV32I-NEXT: bltu ra, t0, .LBB20_110
+; RV32I-NEXT: .LBB20_118:
; RV32I-NEXT: srai s1, a4, 31
; RV32I-NEXT: mv s2, s1
-; RV32I-NEXT: bltu a5, a6, .LBB20_120
-; RV32I-NEXT: j .LBB20_121
+; RV32I-NEXT: bltu a5, a6, .LBB20_111
+; RV32I-NEXT: j .LBB20_112
%src = load i256, ptr %src.ptr, align 1
%dwordOff = load i256, ptr %dwordOff.ptr, align 1
%bitOff = shl i256 %dwordOff, 6
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
index d3c6a1322ebbe..8534ad379ebab 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
@@ -208,26 +208,28 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: bnez a0, .LBB11_7
+; RV32-NEXT: bnez a0, .LBB11_6
; RV32-NEXT: .LBB11_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a5, s0, .LBB11_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1
; RV32-NEXT: slt a0, s0, a5
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
+; RV32-NEXT: bnez a0, .LBB11_1
; RV32-NEXT: j .LBB11_5
; RV32-NEXT: .LBB11_4: # in Loop: Header=BB11_2 Depth=1
; RV32-NEXT: sltu a0, s2, a4
-; RV32-NEXT: .LBB11_5: # in Loop: Header=BB11_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: bnez a0, .LBB11_1
-; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: .LBB11_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1
; RV32-NEXT: mv a2, s2
; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB11_1
-; RV32-NEXT: .LBB11_7: # %atomicrmw.end
+; RV32-NEXT: .LBB11_6: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -282,26 +284,28 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: bnez a0, .LBB13_7
+; RV32-NEXT: bnez a0, .LBB13_6
; RV32-NEXT: .LBB13_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a5, s0, .LBB13_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1
; RV32-NEXT: sltu a0, s0, a5
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
+; RV32-NEXT: bnez a0, .LBB13_1
; RV32-NEXT: j .LBB13_5
; RV32-NEXT: .LBB13_4: # in Loop: Header=BB13_2 Depth=1
; RV32-NEXT: sltu a0, s2, a4
-; RV32-NEXT: .LBB13_5: # in Loop: Header=BB13_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: bnez a0, .LBB13_1
-; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: .LBB13_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1
; RV32-NEXT: mv a2, s2
; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB13_1
-; RV32-NEXT: .LBB13_7: # %atomicrmw.end
+; RV32-NEXT: .LBB13_6: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -356,26 +360,28 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: bnez a0, .LBB15_7
+; RV32-NEXT: bnez a0, .LBB15_6
; RV32-NEXT: .LBB15_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a5, s0, .LBB15_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1
; RV32-NEXT: slt a0, s0, a5
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
+; RV32-NEXT: beqz a0, .LBB15_1
; RV32-NEXT: j .LBB15_5
; RV32-NEXT: .LBB15_4: # in Loop: Header=BB15_2 Depth=1
; RV32-NEXT: sltu a0, s2, a4
-; RV32-NEXT: .LBB15_5: # in Loop: Header=BB15_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: beqz a0, .LBB15_1
-; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: .LBB15_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1
; RV32-NEXT: mv a2, s2
; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB15_1
-; RV32-NEXT: .LBB15_7: # %atomicrmw.end
+; RV32-NEXT: .LBB15_6: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -430,26 +436,28 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
-; RV32-NEXT: bnez a0, .LBB17_7
+; RV32-NEXT: bnez a0, .LBB17_6
; RV32-NEXT: .LBB17_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beq a5, s0, .LBB17_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1
; RV32-NEXT: sltu a0, s0, a5
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
+; RV32-NEXT: beqz a0, .LBB17_1
; RV32-NEXT: j .LBB17_5
; RV32-NEXT: .LBB17_4: # in Loop: Header=BB17_2 Depth=1
; RV32-NEXT: sltu a0, s2, a4
-; RV32-NEXT: .LBB17_5: # in Loop: Header=BB17_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: beqz a0, .LBB17_1
-; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: .LBB17_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1
; RV32-NEXT: mv a2, s2
; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB17_1
-; RV32-NEXT: .LBB17_7: # %atomicrmw.end
+; RV32-NEXT: .LBB17_6: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/copyprop.ll b/llvm/test/CodeGen/RISCV/copyprop.ll
index b87f8baa77964..ddf58403382c5 100644
--- a/llvm/test/CodeGen/RISCV/copyprop.ll
+++ b/llvm/test/CodeGen/RISCV/copyprop.ll
@@ -7,21 +7,23 @@ define void @copyprop_after_mbp(i32 %v, ptr %a, ptr %b, ptr %c, ptr %d) {
; NOPROP: # %bb.0:
; NOPROP-NEXT: sext.w a0, a0
; NOPROP-NEXT: li a5, 10
-; NOPROP-NEXT: bne a0, a5, .LBB0_3
+; NOPROP-NEXT: bne a0, a5, .LBB0_2
; NOPROP-NEXT: # %bb.1: # %bb.0
; NOPROP-NEXT: li a0, 15
; NOPROP-NEXT: sw a0, 0(a2)
; NOPROP-NEXT: li a0, 1
-; NOPROP-NEXT: .LBB0_2: # %bb.0
; NOPROP-NEXT: sw a0, 0(a1)
; NOPROP-NEXT: li a0, 12
; NOPROP-NEXT: sw a0, 0(a4)
; NOPROP-NEXT: ret
-; NOPROP-NEXT: .LBB0_3: # %bb.1
+; NOPROP-NEXT: .LBB0_2: # %bb.1
; NOPROP-NEXT: li a0, 0
; NOPROP-NEXT: li a2, 25
; NOPROP-NEXT: sw a2, 0(a3)
-; NOPROP-NEXT: j .LBB0_2
+; NOPROP-NEXT: sw a0, 0(a1)
+; NOPROP-NEXT: li a0, 12
+; NOPROP-NEXT: sw a0, 0(a4)
+; NOPROP-NEXT: ret
;
; PROP-LABEL: copyprop_after_mbp:
; PROP: # %bb.0:
@@ -33,12 +35,13 @@ define void @copyprop_after_mbp(i32 %v, ptr %a, ptr %b, ptr %c, ptr %d) {
; PROP-NEXT: sw a0, 0(a2)
; PROP-NEXT: li a0, 1
; PROP-NEXT: sw a0, 0(a1)
-; PROP-NEXT: j .LBB0_3
+; PROP-NEXT: li a0, 12
+; PROP-NEXT: sw a0, 0(a4)
+; PROP-NEXT: ret
; PROP-NEXT: .LBB0_2: # %bb.1
; PROP-NEXT: li a2, 25
; PROP-NEXT: sw a2, 0(a3)
; PROP-NEXT: sw zero, 0(a1)
-; PROP-NEXT: .LBB0_3: # %bb.1
; PROP-NEXT: li a0, 12
; PROP-NEXT: sw a0, 0(a4)
; PROP-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll
index d3d7dea5ea2be..4ee01cc48b9f0 100644
--- a/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll
+++ b/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll
@@ -20,15 +20,16 @@ define double @fminimum_f64(double %a, double %b) nounwind {
; CHECKIFD-NEXT: feq.d a0, fa0, fa0
; CHECKIFD-NEXT: fmv.d fa5, fa1
; CHECKIFD-NEXT: beqz a0, .LBB0_3
-; CHECKIFD-NEXT: .LBB0_1:
+; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: feq.d a0, fa1, fa1
; CHECKIFD-NEXT: beqz a0, .LBB0_4
-; CHECKIFD-NEXT: # %bb.2:
+; CHECKIFD-NEXT: .LBB0_2:
; CHECKIFD-NEXT: fmin.d fa0, fa0, fa5
; CHECKIFD-NEXT: ret
; CHECKIFD-NEXT: .LBB0_3:
; CHECKIFD-NEXT: fmv.d fa5, fa0
-; CHECKIFD-NEXT: j .LBB0_1
+; CHECKIFD-NEXT: feq.d a0, fa1, fa1
+; CHECKIFD-NEXT: bnez a0, .LBB0_2
; CHECKIFD-NEXT: .LBB0_4:
; CHECKIFD-NEXT: fmin.d fa0, fa1, fa5
; CHECKIFD-NEXT: ret
@@ -39,16 +40,17 @@ define double @fminimum_f64(double %a, double %b) nounwind {
; RV32IZFINXZDINX-NEXT: mv a4, a2
; RV32IZFINXZDINX-NEXT: mv a5, a3
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB0_3
-; RV32IZFINXZDINX-NEXT: .LBB0_1:
+; RV32IZFINXZDINX-NEXT: # %bb.1:
; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB0_4
-; RV32IZFINXZDINX-NEXT: # %bb.2:
+; RV32IZFINXZDINX-NEXT: .LBB0_2:
; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a4
; RV32IZFINXZDINX-NEXT: ret
; RV32IZFINXZDINX-NEXT: .LBB0_3:
; RV32IZFINXZDINX-NEXT: mv a4, a0
; RV32IZFINXZDINX-NEXT: mv a5, a1
-; RV32IZFINXZDINX-NEXT: j .LBB0_1
+; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
+; RV32IZFINXZDINX-NEXT: bnez a6, .LBB0_2
; RV32IZFINXZDINX-NEXT: .LBB0_4:
; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: mv a1, a3
@@ -60,15 +62,16 @@ define double @fminimum_f64(double %a, double %b) nounwind {
; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0
; RV64IZFINXZDINX-NEXT: mv a2, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB0_3
-; RV64IZFINXZDINX-NEXT: .LBB0_1:
+; RV64IZFINXZDINX-NEXT: # %bb.1:
; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB0_4
-; RV64IZFINXZDINX-NEXT: # %bb.2:
+; RV64IZFINXZDINX-NEXT: .LBB0_2:
; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2
; RV64IZFINXZDINX-NEXT: ret
; RV64IZFINXZDINX-NEXT: .LBB0_3:
; RV64IZFINXZDINX-NEXT: mv a2, a0
-; RV64IZFINXZDINX-NEXT: j .LBB0_1
+; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
+; RV64IZFINXZDINX-NEXT: bnez a3, .LBB0_2
; RV64IZFINXZDINX-NEXT: .LBB0_4:
; RV64IZFINXZDINX-NEXT: fmin.d a0, a1, a2
; RV64IZFINXZDINX-NEXT: ret
@@ -84,15 +87,16 @@ define double @fmaximum_f64(double %a, double %b) nounwind {
; CHECKIFD-NEXT: feq.d a0, fa0, fa0
; CHECKIFD-NEXT: fmv.d fa5, fa1
; CHECKIFD-NEXT: beqz a0, .LBB1_3
-; CHECKIFD-NEXT: .LBB1_1:
+; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: feq.d a0, fa1, fa1
; CHECKIFD-NEXT: beqz a0, .LBB1_4
-; CHECKIFD-NEXT: # %bb.2:
+; CHECKIFD-NEXT: .LBB1_2:
; CHECKIFD-NEXT: fmax.d fa0, fa0, fa5
; CHECKIFD-NEXT: ret
; CHECKIFD-NEXT: .LBB1_3:
; CHECKIFD-NEXT: fmv.d fa5, fa0
-; CHECKIFD-NEXT: j .LBB1_1
+; CHECKIFD-NEXT: feq.d a0, fa1, fa1
+; CHECKIFD-NEXT: bnez a0, .LBB1_2
; CHECKIFD-NEXT: .LBB1_4:
; CHECKIFD-NEXT: fmax.d fa0, fa1, fa5
; CHECKIFD-NEXT: ret
@@ -103,16 +107,17 @@ define double @fmaximum_f64(double %a, double %b) nounwind {
; RV32IZFINXZDINX-NEXT: mv a4, a2
; RV32IZFINXZDINX-NEXT: mv a5, a3
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB1_3
-; RV32IZFINXZDINX-NEXT: .LBB1_1:
+; RV32IZFINXZDINX-NEXT: # %bb.1:
; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB1_4
-; RV32IZFINXZDINX-NEXT: # %bb.2:
+; RV32IZFINXZDINX-NEXT: .LBB1_2:
; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a4
; RV32IZFINXZDINX-NEXT: ret
; RV32IZFINXZDINX-NEXT: .LBB1_3:
; RV32IZFINXZDINX-NEXT: mv a4, a0
; RV32IZFINXZDINX-NEXT: mv a5, a1
-; RV32IZFINXZDINX-NEXT: j .LBB1_1
+; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
+; RV32IZFINXZDINX-NEXT: bnez a6, .LBB1_2
; RV32IZFINXZDINX-NEXT: .LBB1_4:
; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: mv a1, a3
@@ -124,15 +129,16 @@ define double @fmaximum_f64(double %a, double %b) nounwind {
; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0
; RV64IZFINXZDINX-NEXT: mv a2, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB1_3
-; RV64IZFINXZDINX-NEXT: .LBB1_1:
+; RV64IZFINXZDINX-NEXT: # %bb.1:
; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB1_4
-; RV64IZFINXZDINX-NEXT: # %bb.2:
+; RV64IZFINXZDINX-NEXT: .LBB1_2:
; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, a2
; RV64IZFINXZDINX-NEXT: ret
; RV64IZFINXZDINX-NEXT: .LBB1_3:
; RV64IZFINXZDINX-NEXT: mv a2, a0
-; RV64IZFINXZDINX-NEXT: j .LBB1_1
+; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
+; RV64IZFINXZDINX-NEXT: bnez a3, .LBB1_2
; RV64IZFINXZDINX-NEXT: .LBB1_4:
; RV64IZFINXZDINX-NEXT: fmax.d a0, a1, a2
; RV64IZFINXZDINX-NEXT: ret
@@ -165,15 +171,16 @@ define double @fmaximum_nnan_f64(double %a, double %b) nounwind {
; CHECKIFD-NEXT: feq.d a0, fa0, fa0
; CHECKIFD-NEXT: fmv.d fa5, fa1
; CHECKIFD-NEXT: beqz a0, .LBB3_3
-; CHECKIFD-NEXT: .LBB3_1:
+; CHECKIFD-NEXT: # %bb.1:
; CHECKIFD-NEXT: feq.d a0, fa1, fa1
; CHECKIFD-NEXT: beqz a0, .LBB3_4
-; CHECKIFD-NEXT: # %bb.2:
+; CHECKIFD-NEXT: .LBB3_2:
; CHECKIFD-NEXT: fmin.d fa0, fa0, fa5
; CHECKIFD-NEXT: ret
; CHECKIFD-NEXT: .LBB3_3:
; CHECKIFD-NEXT: fmv.d fa5, fa0
-; CHECKIFD-NEXT: j .LBB3_1
+; CHECKIFD-NEXT: feq.d a0, fa1, fa1
+; CHECKIFD-NEXT: bnez a0, .LBB3_2
; CHECKIFD-NEXT: .LBB3_4:
; CHECKIFD-NEXT: fmin.d fa0, fa1, fa5
; CHECKIFD-NEXT: ret
@@ -184,16 +191,17 @@ define double @fmaximum_nnan_f64(double %a, double %b) nounwind {
; RV32IZFINXZDINX-NEXT: mv a4, a2
; RV32IZFINXZDINX-NEXT: mv a5, a3
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB3_3
-; RV32IZFINXZDINX-NEXT: .LBB3_1:
+; RV32IZFINXZDINX-NEXT: # %bb.1:
; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
; RV32IZFINXZDINX-NEXT: beqz a6, .LBB3_4
-; RV32IZFINXZDINX-NEXT: # %bb.2:
+; RV32IZFINXZDINX-NEXT: .LBB3_2:
; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a4
; RV32IZFINXZDINX-NEXT: ret
; RV32IZFINXZDINX-NEXT: .LBB3_3:
; RV32IZFINXZDINX-NEXT: mv a4, a0
; RV32IZFINXZDINX-NEXT: mv a5, a1
-; RV32IZFINXZDINX-NEXT: j .LBB3_1
+; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
+; RV32IZFINXZDINX-NEXT: bnez a6, .LBB3_2
; RV32IZFINXZDINX-NEXT: .LBB3_4:
; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: mv a1, a3
@@ -205,15 +213,16 @@ define double @fmaximum_nnan_f64(double %a, double %b) nounwind {
; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0
; RV64IZFINXZDINX-NEXT: mv a2, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB3_3
-; RV64IZFINXZDINX-NEXT: .LBB3_1:
+; RV64IZFINXZDINX-NEXT: # %bb.1:
; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB3_4
-; RV64IZFINXZDINX-NEXT: # %bb.2:
+; RV64IZFINXZDINX-NEXT: .LBB3_2:
; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2
; RV64IZFINXZDINX-NEXT: ret
; RV64IZFINXZDINX-NEXT: .LBB3_3:
; RV64IZFINXZDINX-NEXT: mv a2, a0
-; RV64IZFINXZDINX-NEXT: j .LBB3_1
+; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
+; RV64IZFINXZDINX-NEXT: bnez a3, .LBB3_2
; RV64IZFINXZDINX-NEXT: .LBB3_4:
; RV64IZFINXZDINX-NEXT: fmin.d a0, a1, a2
; RV64IZFINXZDINX-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll
index f9320fd14ff08..2e9f8cbf6d2ef 100644
--- a/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll
+++ b/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll
@@ -32,15 +32,16 @@ define float @fminimum_f32(float %a, float %b) nounwind {
; RV32IF-NEXT: feq.s a0, fa0, fa0
; RV32IF-NEXT: fmv.s fa5, fa1
; RV32IF-NEXT: beqz a0, .LBB0_3
-; RV32IF-NEXT: .LBB0_1:
+; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: feq.s a0, fa1, fa1
; RV32IF-NEXT: beqz a0, .LBB0_4
-; RV32IF-NEXT: # %bb.2:
+; RV32IF-NEXT: .LBB0_2:
; RV32IF-NEXT: fmin.s fa0, fa0, fa5
; RV32IF-NEXT: ret
; RV32IF-NEXT: .LBB0_3:
; RV32IF-NEXT: fmv.s fa5, fa0
-; RV32IF-NEXT: j .LBB0_1
+; RV32IF-NEXT: feq.s a0, fa1, fa1
+; RV32IF-NEXT: bnez a0, .LBB0_2
; RV32IF-NEXT: .LBB0_4:
; RV32IF-NEXT: fmin.s fa0, fa1, fa5
; RV32IF-NEXT: ret
@@ -50,15 +51,16 @@ define float @fminimum_f32(float %a, float %b) nounwind {
; RV32IZFINX-NEXT: feq.s a3, a0, a0
; RV32IZFINX-NEXT: mv a2, a1
; RV32IZFINX-NEXT: beqz a3, .LBB0_3
-; RV32IZFINX-NEXT: .LBB0_1:
+; RV32IZFINX-NEXT: # %bb.1:
; RV32IZFINX-NEXT: feq.s a3, a1, a1
; RV32IZFINX-NEXT: beqz a3, .LBB0_4
-; RV32IZFINX-NEXT: # %bb.2:
+; RV32IZFINX-NEXT: .LBB0_2:
; RV32IZFINX-NEXT: fmin.s a0, a0, a2
; RV32IZFINX-NEXT: ret
; RV32IZFINX-NEXT: .LBB0_3:
; RV32IZFINX-NEXT: mv a2, a0
-; RV32IZFINX-NEXT: j .LBB0_1
+; RV32IZFINX-NEXT: feq.s a3, a1, a1
+; RV32IZFINX-NEXT: bnez a3, .LBB0_2
; RV32IZFINX-NEXT: .LBB0_4:
; RV32IZFINX-NEXT: fmin.s a0, a1, a2
; RV32IZFINX-NEXT: ret
@@ -77,15 +79,16 @@ define float @fminimum_f32(float %a, float %b) nounwind {
; RV64IF-NEXT: feq.s a0, fa0, fa0
; RV64IF-NEXT: fmv.s fa5, fa1
; RV64IF-NEXT: beqz a0, .LBB0_3
-; RV64IF-NEXT: .LBB0_1:
+; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: feq.s a0, fa1, fa1
; RV64IF-NEXT: beqz a0, .LBB0_4
-; RV64IF-NEXT: # %bb.2:
+; RV64IF-NEXT: .LBB0_2:
; RV64IF-NEXT: fmin.s fa0, fa0, fa5
; RV64IF-NEXT: ret
; RV64IF-NEXT: .LBB0_3:
; RV64IF-NEXT: fmv.s fa5, fa0
-; RV64IF-NEXT: j .LBB0_1
+; RV64IF-NEXT: feq.s a0, fa1, fa1
+; RV64IF-NEXT: bnez a0, .LBB0_2
; RV64IF-NEXT: .LBB0_4:
; RV64IF-NEXT: fmin.s fa0, fa1, fa5
; RV64IF-NEXT: ret
@@ -95,15 +98,16 @@ define float @fminimum_f32(float %a, float %b) nounwind {
; RV64IZFINX-NEXT: feq.s a3, a0, a0
; RV64IZFINX-NEXT: mv a2, a1
; RV64IZFINX-NEXT: beqz a3, .LBB0_3
-; RV64IZFINX-NEXT: .LBB0_1:
+; RV64IZFINX-NEXT: # %bb.1:
; RV64IZFINX-NEXT: feq.s a3, a1, a1
; RV64IZFINX-NEXT: beqz a3, .LBB0_4
-; RV64IZFINX-NEXT: # %bb.2:
+; RV64IZFINX-NEXT: .LBB0_2:
; RV64IZFINX-NEXT: fmin.s a0, a0, a2
; RV64IZFINX-NEXT: ret
; RV64IZFINX-NEXT: .LBB0_3:
; RV64IZFINX-NEXT: mv a2, a0
-; RV64IZFINX-NEXT: j .LBB0_1
+; RV64IZFINX-NEXT: feq.s a3, a1, a1
+; RV64IZFINX-NEXT: bnez a3, .LBB0_2
; RV64IZFINX-NEXT: .LBB0_4:
; RV64IZFINX-NEXT: fmin.s a0, a1, a2
; RV64IZFINX-NEXT: ret
@@ -128,15 +132,16 @@ define float @fmaximum_f32(float %a, float %b) nounwind {
; RV32IF-NEXT: feq.s a0, fa0, fa0
; RV32IF-NEXT: fmv.s fa5, fa1
; RV32IF-NEXT: beqz a0, .LBB1_3
-; RV32IF-NEXT: .LBB1_1:
+; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: feq.s a0, fa1, fa1
; RV32IF-NEXT: beqz a0, .LBB1_4
-; RV32IF-NEXT: # %bb.2:
+; RV32IF-NEXT: .LBB1_2:
; RV32IF-NEXT: fmax.s fa0, fa0, fa5
; RV32IF-NEXT: ret
; RV32IF-NEXT: .LBB1_3:
; RV32IF-NEXT: fmv.s fa5, fa0
-; RV32IF-NEXT: j .LBB1_1
+; RV32IF-NEXT: feq.s a0, fa1, fa1
+; RV32IF-NEXT: bnez a0, .LBB1_2
; RV32IF-NEXT: .LBB1_4:
; RV32IF-NEXT: fmax.s fa0, fa1, fa5
; RV32IF-NEXT: ret
@@ -146,15 +151,16 @@ define float @fmaximum_f32(float %a, float %b) nounwind {
; RV32IZFINX-NEXT: feq.s a3, a0, a0
; RV32IZFINX-NEXT: mv a2, a1
; RV32IZFINX-NEXT: beqz a3, .LBB1_3
-; RV32IZFINX-NEXT: .LBB1_1:
+; RV32IZFINX-NEXT: # %bb.1:
; RV32IZFINX-NEXT: feq.s a3, a1, a1
; RV32IZFINX-NEXT: beqz a3, .LBB1_4
-; RV32IZFINX-NEXT: # %bb.2:
+; RV32IZFINX-NEXT: .LBB1_2:
; RV32IZFINX-NEXT: fmax.s a0, a0, a2
; RV32IZFINX-NEXT: ret
; RV32IZFINX-NEXT: .LBB1_3:
; RV32IZFINX-NEXT: mv a2, a0
-; RV32IZFINX-NEXT: j .LBB1_1
+; RV32IZFINX-NEXT: feq.s a3, a1, a1
+; RV32IZFINX-NEXT: bnez a3, .LBB1_2
; RV32IZFINX-NEXT: .LBB1_4:
; RV32IZFINX-NEXT: fmax.s a0, a1, a2
; RV32IZFINX-NEXT: ret
@@ -173,15 +179,16 @@ define float @fmaximum_f32(float %a, float %b) nounwind {
; RV64IF-NEXT: feq.s a0, fa0, fa0
; RV64IF-NEXT: fmv.s fa5, fa1
; RV64IF-NEXT: beqz a0, .LBB1_3
-; RV64IF-NEXT: .LBB1_1:
+; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: feq.s a0, fa1, fa1
; RV64IF-NEXT: beqz a0, .LBB1_4
-; RV64IF-NEXT: # %bb.2:
+; RV64IF-NEXT: .LBB1_2:
; RV64IF-NEXT: fmax.s fa0, fa0, fa5
; RV64IF-NEXT: ret
; RV64IF-NEXT: .LBB1_3:
; RV64IF-NEXT: fmv.s fa5, fa0
-; RV64IF-NEXT: j .LBB1_1
+; RV64IF-NEXT: feq.s a0, fa1, fa1
+; RV64IF-NEXT: bnez a0, .LBB1_2
; RV64IF-NEXT: .LBB1_4:
; RV64IF-NEXT: fmax.s fa0, fa1, fa5
; RV64IF-NEXT: ret
@@ -191,15 +198,16 @@ define float @fmaximum_f32(float %a, float %b) nounwind {
; RV64IZFINX-NEXT: feq.s a3, a0, a0
; RV64IZFINX-NEXT: mv a2, a1
; RV64IZFINX-NEXT: beqz a3, .LBB1_3
-; RV64IZFINX-NEXT: .LBB1_1:
+; RV64IZFINX-NEXT: # %bb.1:
; RV64IZFINX-NEXT: feq.s a3, a1, a1
; RV64IZFINX-NEXT: beqz a3, .LBB1_4
-; RV64IZFINX-NEXT: # %bb.2:
+; RV64IZFINX-NEXT: .LBB1_2:
; RV64IZFINX-NEXT: fmax.s a0, a0, a2
; RV64IZFINX-NEXT: ret
; RV64IZFINX-NEXT: .LBB1_3:
; RV64IZFINX-NEXT: mv a2, a0
-; RV64IZFINX-NEXT: j .LBB1_1
+; RV64IZFINX-NEXT: feq.s a3, a1, a1
+; RV64IZFINX-NEXT: bnez a3, .LBB1_2
; RV64IZFINX-NEXT: .LBB1_4:
; RV64IZFINX-NEXT: fmax.s a0, a1, a2
; RV64IZFINX-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll
index f105708e4e77b..e7719dc70660b 100644
--- a/llvm/test/CodeGen/RISCV/forced-atomics.ll
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll
@@ -3367,15 +3367,19 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
; RV32-NEXT: bnez a0, .LBB49_6
; RV32-NEXT: .LBB49_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beqz a1, .LBB49_7
+; RV32-NEXT: beqz a1, .LBB49_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
; RV32-NEXT: sgtz a0, a1
-; RV32-NEXT: .LBB49_4: # %atomicrmw.start
-; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: bnez a0, .LBB49_1
-; RV32-NEXT: # %bb.5: # %atomicrmw.start
+; RV32-NEXT: j .LBB49_5
+; RV32-NEXT: .LBB49_4: # in Loop: Header=BB49_2 Depth=1
+; RV32-NEXT: sltiu a0, a4, 2
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: bnez a0, .LBB49_1
+; RV32-NEXT: .LBB49_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
; RV32-NEXT: li a2, 1
; RV32-NEXT: j .LBB49_1
@@ -3385,10 +3389,6 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
-; RV32-NEXT: .LBB49_7: # in Loop: Header=BB49_2 Depth=1
-; RV32-NEXT: sltiu a0, a4, 2
-; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: j .LBB49_4
;
; RV64-NO-ATOMIC-LABEL: rmw64_max_seq_cst:
; RV64-NO-ATOMIC: # %bb.0:
@@ -3469,24 +3469,25 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 0(sp)
; RV32-NEXT: lw a1, 4(sp)
-; RV32-NEXT: bnez a0, .LBB50_7
+; RV32-NEXT: bnez a0, .LBB50_6
; RV32-NEXT: .LBB50_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beqz a1, .LBB50_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: slti a0, a1, 0
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: bnez a0, .LBB50_1
; RV32-NEXT: j .LBB50_5
; RV32-NEXT: .LBB50_4: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: sltiu a0, a4, 2
-; RV32-NEXT: .LBB50_5: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: bnez a0, .LBB50_1
-; RV32-NEXT: # %bb.6: # %atomicrmw.start
+; RV32-NEXT: .LBB50_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
; RV32-NEXT: li a2, 1
; RV32-NEXT: j .LBB50_1
-; RV32-NEXT: .LBB50_7: # %atomicrmw.end
+; RV32-NEXT: .LBB50_6: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 6ee3bd5349ce7..c5c3b199447a9 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -607,14 +607,15 @@ define i16 @stest_f64i16(double %x) {
; RV32IFD-NEXT: lui a1, 8
; RV32IFD-NEXT: addi a1, a1, -1
; RV32IFD-NEXT: bge a0, a1, .LBB9_3
-; RV32IFD-NEXT: .LBB9_1: # %entry
+; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: lui a1, 1048568
; RV32IFD-NEXT: bge a1, a0, .LBB9_4
-; RV32IFD-NEXT: # %bb.2: # %entry
+; RV32IFD-NEXT: .LBB9_2: # %entry
; RV32IFD-NEXT: ret
; RV32IFD-NEXT: .LBB9_3: # %entry
; RV32IFD-NEXT: mv a0, a1
-; RV32IFD-NEXT: j .LBB9_1
+; RV32IFD-NEXT: lui a1, 1048568
+; RV32IFD-NEXT: blt a1, a0, .LBB9_2
; RV32IFD-NEXT: .LBB9_4: # %entry
; RV32IFD-NEXT: lui a0, 1048568
; RV32IFD-NEXT: ret
@@ -625,14 +626,15 @@ define i16 @stest_f64i16(double %x) {
; RV64IFD-NEXT: lui a1, 8
; RV64IFD-NEXT: addiw a1, a1, -1
; RV64IFD-NEXT: bge a0, a1, .LBB9_3
-; RV64IFD-NEXT: .LBB9_1: # %entry
+; RV64IFD-NEXT: # %bb.1: # %entry
; RV64IFD-NEXT: lui a1, 1048568
; RV64IFD-NEXT: bge a1, a0, .LBB9_4
-; RV64IFD-NEXT: # %bb.2: # %entry
+; RV64IFD-NEXT: .LBB9_2: # %entry
; RV64IFD-NEXT: ret
; RV64IFD-NEXT: .LBB9_3: # %entry
; RV64IFD-NEXT: mv a0, a1
-; RV64IFD-NEXT: j .LBB9_1
+; RV64IFD-NEXT: lui a1, 1048568
+; RV64IFD-NEXT: blt a1, a0, .LBB9_2
; RV64IFD-NEXT: .LBB9_4: # %entry
; RV64IFD-NEXT: lui a0, 1048568
; RV64IFD-NEXT: ret
@@ -803,14 +805,15 @@ define i16 @stest_f32i16(float %x) {
; RV32-NEXT: lui a1, 8
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: bge a0, a1, .LBB12_3
-; RV32-NEXT: .LBB12_1: # %entry
+; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: lui a1, 1048568
; RV32-NEXT: bge a1, a0, .LBB12_4
-; RV32-NEXT: # %bb.2: # %entry
+; RV32-NEXT: .LBB12_2: # %entry
; RV32-NEXT: ret
; RV32-NEXT: .LBB12_3: # %entry
; RV32-NEXT: mv a0, a1
-; RV32-NEXT: j .LBB12_1
+; RV32-NEXT: lui a1, 1048568
+; RV32-NEXT: blt a1, a0, .LBB12_2
; RV32-NEXT: .LBB12_4: # %entry
; RV32-NEXT: lui a0, 1048568
; RV32-NEXT: ret
@@ -821,14 +824,15 @@ define i16 @stest_f32i16(float %x) {
; RV64-NEXT: lui a1, 8
; RV64-NEXT: addiw a1, a1, -1
; RV64-NEXT: bge a0, a1, .LBB12_3
-; RV64-NEXT: .LBB12_1: # %entry
+; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: lui a1, 1048568
; RV64-NEXT: bge a1, a0, .LBB12_4
-; RV64-NEXT: # %bb.2: # %entry
+; RV64-NEXT: .LBB12_2: # %entry
; RV64-NEXT: ret
; RV64-NEXT: .LBB12_3: # %entry
; RV64-NEXT: mv a0, a1
-; RV64-NEXT: j .LBB12_1
+; RV64-NEXT: lui a1, 1048568
+; RV64-NEXT: blt a1, a0, .LBB12_2
; RV64-NEXT: .LBB12_4: # %entry
; RV64-NEXT: lui a0, 1048568
; RV64-NEXT: ret
@@ -1096,42 +1100,43 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: beq a1, a5, .LBB18_2
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: sltu a6, a1, a5
-; RV32IF-NEXT: j .LBB18_3
+; RV32IF-NEXT: or a7, a2, a4
+; RV32IF-NEXT: bnez a7, .LBB18_3
+; RV32IF-NEXT: j .LBB18_4
; RV32IF-NEXT: .LBB18_2:
; RV32IF-NEXT: sltiu a6, a3, -1
-; RV32IF-NEXT: .LBB18_3:
; RV32IF-NEXT: or a7, a2, a4
-; RV32IF-NEXT: beqz a7, .LBB18_5
-; RV32IF-NEXT: # %bb.4: # %entry
+; RV32IF-NEXT: beqz a7, .LBB18_4
+; RV32IF-NEXT: .LBB18_3: # %entry
; RV32IF-NEXT: slti a6, a4, 0
-; RV32IF-NEXT: .LBB18_5: # %entry
+; RV32IF-NEXT: .LBB18_4: # %entry
; RV32IF-NEXT: addi a7, a6, -1
; RV32IF-NEXT: neg t0, a6
-; RV32IF-NEXT: bnez a6, .LBB18_7
-; RV32IF-NEXT: # %bb.6: # %entry
+; RV32IF-NEXT: bnez a6, .LBB18_6
+; RV32IF-NEXT: # %bb.5: # %entry
; RV32IF-NEXT: mv a1, a5
-; RV32IF-NEXT: .LBB18_7: # %entry
+; RV32IF-NEXT: .LBB18_6: # %entry
; RV32IF-NEXT: or a3, a7, a3
; RV32IF-NEXT: and a4, t0, a4
; RV32IF-NEXT: and a2, t0, a2
-; RV32IF-NEXT: beq a1, a0, .LBB18_9
-; RV32IF-NEXT: # %bb.8: # %entry
+; RV32IF-NEXT: beq a1, a0, .LBB18_8
+; RV32IF-NEXT: # %bb.7: # %entry
; RV32IF-NEXT: sltu a0, a0, a1
-; RV32IF-NEXT: j .LBB18_10
-; RV32IF-NEXT: .LBB18_9:
+; RV32IF-NEXT: j .LBB18_9
+; RV32IF-NEXT: .LBB18_8:
; RV32IF-NEXT: snez a0, a3
-; RV32IF-NEXT: .LBB18_10: # %entry
+; RV32IF-NEXT: .LBB18_9: # %entry
; RV32IF-NEXT: and a2, a2, a4
; RV32IF-NEXT: li a5, -1
-; RV32IF-NEXT: beq a2, a5, .LBB18_12
-; RV32IF-NEXT: # %bb.11: # %entry
+; RV32IF-NEXT: beq a2, a5, .LBB18_11
+; RV32IF-NEXT: # %bb.10: # %entry
; RV32IF-NEXT: slti a0, a4, 0
; RV32IF-NEXT: xori a0, a0, 1
-; RV32IF-NEXT: .LBB18_12: # %entry
-; RV32IF-NEXT: bnez a0, .LBB18_14
-; RV32IF-NEXT: # %bb.13: # %entry
+; RV32IF-NEXT: .LBB18_11: # %entry
+; RV32IF-NEXT: bnez a0, .LBB18_13
+; RV32IF-NEXT: # %bb.12: # %entry
; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: .LBB18_14: # %entry
+; RV32IF-NEXT: .LBB18_13: # %entry
; RV32IF-NEXT: neg a0, a0
; RV32IF-NEXT: and a0, a0, a3
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -1198,42 +1203,43 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: beq a1, a5, .LBB18_2
; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: sltu a6, a1, a5
-; RV32IFD-NEXT: j .LBB18_3
+; RV32IFD-NEXT: or a7, a2, a4
+; RV32IFD-NEXT: bnez a7, .LBB18_3
+; RV32IFD-NEXT: j .LBB18_4
; RV32IFD-NEXT: .LBB18_2:
; RV32IFD-NEXT: sltiu a6, a3, -1
-; RV32IFD-NEXT: .LBB18_3:
; RV32IFD-NEXT: or a7, a2, a4
-; RV32IFD-NEXT: beqz a7, .LBB18_5
-; RV32IFD-NEXT: # %bb.4: # %entry
+; RV32IFD-NEXT: beqz a7, .LBB18_4
+; RV32IFD-NEXT: .LBB18_3: # %entry
; RV32IFD-NEXT: slti a6, a4, 0
-; RV32IFD-NEXT: .LBB18_5: # %entry
+; RV32IFD-NEXT: .LBB18_4: # %entry
; RV32IFD-NEXT: addi a7, a6, -1
; RV32IFD-NEXT: neg t0, a6
-; RV32IFD-NEXT: bnez a6, .LBB18_7
-; RV32IFD-NEXT: # %bb.6: # %entry
+; RV32IFD-NEXT: bnez a6, .LBB18_6
+; RV32IFD-NEXT: # %bb.5: # %entry
; RV32IFD-NEXT: mv a1, a5
-; RV32IFD-NEXT: .LBB18_7: # %entry
+; RV32IFD-NEXT: .LBB18_6: # %entry
; RV32IFD-NEXT: or a3, a7, a3
; RV32IFD-NEXT: and a4, t0, a4
; RV32IFD-NEXT: and a2, t0, a2
-; RV32IFD-NEXT: beq a1, a0, .LBB18_9
-; RV32IFD-NEXT: # %bb.8: # %entry
+; RV32IFD-NEXT: beq a1, a0, .LBB18_8
+; RV32IFD-NEXT: # %bb.7: # %entry
; RV32IFD-NEXT: sltu a0, a0, a1
-; RV32IFD-NEXT: j .LBB18_10
-; RV32IFD-NEXT: .LBB18_9:
+; RV32IFD-NEXT: j .LBB18_9
+; RV32IFD-NEXT: .LBB18_8:
; RV32IFD-NEXT: snez a0, a3
-; RV32IFD-NEXT: .LBB18_10: # %entry
+; RV32IFD-NEXT: .LBB18_9: # %entry
; RV32IFD-NEXT: and a2, a2, a4
; RV32IFD-NEXT: li a5, -1
-; RV32IFD-NEXT: beq a2, a5, .LBB18_12
-; RV32IFD-NEXT: # %bb.11: # %entry
+; RV32IFD-NEXT: beq a2, a5, .LBB18_11
+; RV32IFD-NEXT: # %bb.10: # %entry
; RV32IFD-NEXT: slti a0, a4, 0
; RV32IFD-NEXT: xori a0, a0, 1
-; RV32IFD-NEXT: .LBB18_12: # %entry
-; RV32IFD-NEXT: bnez a0, .LBB18_14
-; RV32IFD-NEXT: # %bb.13: # %entry
+; RV32IFD-NEXT: .LBB18_11: # %entry
+; RV32IFD-NEXT: bnez a0, .LBB18_13
+; RV32IFD-NEXT: # %bb.12: # %entry
; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: .LBB18_14: # %entry
+; RV32IFD-NEXT: .LBB18_13: # %entry
; RV32IFD-NEXT: neg a0, a0
; RV32IFD-NEXT: and a0, a0, a3
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -1509,42 +1515,43 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: beq a1, a5, .LBB21_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: j .LBB21_3
+; RV32-NEXT: or a7, a2, a4
+; RV32-NEXT: bnez a7, .LBB21_3
+; RV32-NEXT: j .LBB21_4
; RV32-NEXT: .LBB21_2:
; RV32-NEXT: sltiu a6, a3, -1
-; RV32-NEXT: .LBB21_3:
; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: beqz a7, .LBB21_5
-; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: beqz a7, .LBB21_4
+; RV32-NEXT: .LBB21_3: # %entry
; RV32-NEXT: slti a6, a4, 0
-; RV32-NEXT: .LBB21_5: # %entry
+; RV32-NEXT: .LBB21_4: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
-; RV32-NEXT: bnez a6, .LBB21_7
-; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: bnez a6, .LBB21_6
+; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
-; RV32-NEXT: .LBB21_7: # %entry
+; RV32-NEXT: .LBB21_6: # %entry
; RV32-NEXT: or a3, a7, a3
; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: beq a1, a0, .LBB21_9
-; RV32-NEXT: # %bb.8: # %entry
+; RV32-NEXT: beq a1, a0, .LBB21_8
+; RV32-NEXT: # %bb.7: # %entry
; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: j .LBB21_10
-; RV32-NEXT: .LBB21_9:
+; RV32-NEXT: j .LBB21_9
+; RV32-NEXT: .LBB21_8:
; RV32-NEXT: snez a0, a3
-; RV32-NEXT: .LBB21_10: # %entry
+; RV32-NEXT: .LBB21_9: # %entry
; RV32-NEXT: and a2, a2, a4
; RV32-NEXT: li a5, -1
-; RV32-NEXT: beq a2, a5, .LBB21_12
-; RV32-NEXT: # %bb.11: # %entry
+; RV32-NEXT: beq a2, a5, .LBB21_11
+; RV32-NEXT: # %bb.10: # %entry
; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: .LBB21_12: # %entry
-; RV32-NEXT: bnez a0, .LBB21_14
-; RV32-NEXT: # %bb.13: # %entry
+; RV32-NEXT: .LBB21_11: # %entry
+; RV32-NEXT: bnez a0, .LBB21_13
+; RV32-NEXT: # %bb.12: # %entry
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB21_14: # %entry
+; RV32-NEXT: .LBB21_13: # %entry
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -1735,42 +1742,43 @@ define i64 @stest_f16i64(half %x) {
; RV32-NEXT: beq a1, a5, .LBB24_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: j .LBB24_3
+; RV32-NEXT: or a7, a2, a4
+; RV32-NEXT: bnez a7, .LBB24_3
+; RV32-NEXT: j .LBB24_4
; RV32-NEXT: .LBB24_2:
; RV32-NEXT: sltiu a6, a3, -1
-; RV32-NEXT: .LBB24_3:
; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: beqz a7, .LBB24_5
-; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: beqz a7, .LBB24_4
+; RV32-NEXT: .LBB24_3: # %entry
; RV32-NEXT: slti a6, a4, 0
-; RV32-NEXT: .LBB24_5: # %entry
+; RV32-NEXT: .LBB24_4: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
-; RV32-NEXT: bnez a6, .LBB24_7
-; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: bnez a6, .LBB24_6
+; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
-; RV32-NEXT: .LBB24_7: # %entry
+; RV32-NEXT: .LBB24_6: # %entry
; RV32-NEXT: or a3, a7, a3
; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: beq a1, a0, .LBB24_9
-; RV32-NEXT: # %bb.8: # %entry
+; RV32-NEXT: beq a1, a0, .LBB24_8
+; RV32-NEXT: # %bb.7: # %entry
; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: j .LBB24_10
-; RV32-NEXT: .LBB24_9:
+; RV32-NEXT: j .LBB24_9
+; RV32-NEXT: .LBB24_8:
; RV32-NEXT: snez a0, a3
-; RV32-NEXT: .LBB24_10: # %entry
+; RV32-NEXT: .LBB24_9: # %entry
; RV32-NEXT: and a2, a2, a4
; RV32-NEXT: li a5, -1
-; RV32-NEXT: beq a2, a5, .LBB24_12
-; RV32-NEXT: # %bb.11: # %entry
+; RV32-NEXT: beq a2, a5, .LBB24_11
+; RV32-NEXT: # %bb.10: # %entry
; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: .LBB24_12: # %entry
-; RV32-NEXT: bnez a0, .LBB24_14
-; RV32-NEXT: # %bb.13: # %entry
+; RV32-NEXT: .LBB24_11: # %entry
+; RV32-NEXT: bnez a0, .LBB24_13
+; RV32-NEXT: # %bb.12: # %entry
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB24_14: # %entry
+; RV32-NEXT: .LBB24_13: # %entry
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -2550,14 +2558,15 @@ define i16 @stest_f64i16_mm(double %x) {
; RV32IFD-NEXT: lui a1, 8
; RV32IFD-NEXT: addi a1, a1, -1
; RV32IFD-NEXT: bge a0, a1, .LBB36_3
-; RV32IFD-NEXT: .LBB36_1: # %entry
+; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: lui a1, 1048568
; RV32IFD-NEXT: bge a1, a0, .LBB36_4
-; RV32IFD-NEXT: # %bb.2: # %entry
+; RV32IFD-NEXT: .LBB36_2: # %entry
; RV32IFD-NEXT: ret
; RV32IFD-NEXT: .LBB36_3: # %entry
; RV32IFD-NEXT: mv a0, a1
-; RV32IFD-NEXT: j .LBB36_1
+; RV32IFD-NEXT: lui a1, 1048568
+; RV32IFD-NEXT: blt a1, a0, .LBB36_2
; RV32IFD-NEXT: .LBB36_4: # %entry
; RV32IFD-NEXT: lui a0, 1048568
; RV32IFD-NEXT: ret
@@ -2568,14 +2577,15 @@ define i16 @stest_f64i16_mm(double %x) {
; RV64IFD-NEXT: lui a1, 8
; RV64IFD-NEXT: addiw a1, a1, -1
; RV64IFD-NEXT: bge a0, a1, .LBB36_3
-; RV64IFD-NEXT: .LBB36_1: # %entry
+; RV64IFD-NEXT: # %bb.1: # %entry
; RV64IFD-NEXT: lui a1, 1048568
; RV64IFD-NEXT: bge a1, a0, .LBB36_4
-; RV64IFD-NEXT: # %bb.2: # %entry
+; RV64IFD-NEXT: .LBB36_2: # %entry
; RV64IFD-NEXT: ret
; RV64IFD-NEXT: .LBB36_3: # %entry
; RV64IFD-NEXT: mv a0, a1
-; RV64IFD-NEXT: j .LBB36_1
+; RV64IFD-NEXT: lui a1, 1048568
+; RV64IFD-NEXT: blt a1, a0, .LBB36_2
; RV64IFD-NEXT: .LBB36_4: # %entry
; RV64IFD-NEXT: lui a0, 1048568
; RV64IFD-NEXT: ret
@@ -2741,14 +2751,15 @@ define i16 @stest_f32i16_mm(float %x) {
; RV32-NEXT: lui a1, 8
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: bge a0, a1, .LBB39_3
-; RV32-NEXT: .LBB39_1: # %entry
+; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: lui a1, 1048568
; RV32-NEXT: bge a1, a0, .LBB39_4
-; RV32-NEXT: # %bb.2: # %entry
+; RV32-NEXT: .LBB39_2: # %entry
; RV32-NEXT: ret
; RV32-NEXT: .LBB39_3: # %entry
; RV32-NEXT: mv a0, a1
-; RV32-NEXT: j .LBB39_1
+; RV32-NEXT: lui a1, 1048568
+; RV32-NEXT: blt a1, a0, .LBB39_2
; RV32-NEXT: .LBB39_4: # %entry
; RV32-NEXT: lui a0, 1048568
; RV32-NEXT: ret
@@ -2759,14 +2770,15 @@ define i16 @stest_f32i16_mm(float %x) {
; RV64-NEXT: lui a1, 8
; RV64-NEXT: addiw a1, a1, -1
; RV64-NEXT: bge a0, a1, .LBB39_3
-; RV64-NEXT: .LBB39_1: # %entry
+; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: lui a1, 1048568
; RV64-NEXT: bge a1, a0, .LBB39_4
-; RV64-NEXT: # %bb.2: # %entry
+; RV64-NEXT: .LBB39_2: # %entry
; RV64-NEXT: ret
; RV64-NEXT: .LBB39_3: # %entry
; RV64-NEXT: mv a0, a1
-; RV64-NEXT: j .LBB39_1
+; RV64-NEXT: lui a1, 1048568
+; RV64-NEXT: blt a1, a0, .LBB39_2
; RV64-NEXT: .LBB39_4: # %entry
; RV64-NEXT: lui a0, 1048568
; RV64-NEXT: ret
@@ -3024,42 +3036,43 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: beq a1, a5, .LBB45_2
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: sltu a6, a1, a5
-; RV32IF-NEXT: j .LBB45_3
+; RV32IF-NEXT: or a7, a2, a4
+; RV32IF-NEXT: bnez a7, .LBB45_3
+; RV32IF-NEXT: j .LBB45_4
; RV32IF-NEXT: .LBB45_2:
; RV32IF-NEXT: sltiu a6, a3, -1
-; RV32IF-NEXT: .LBB45_3:
; RV32IF-NEXT: or a7, a2, a4
-; RV32IF-NEXT: beqz a7, .LBB45_5
-; RV32IF-NEXT: # %bb.4: # %entry
+; RV32IF-NEXT: beqz a7, .LBB45_4
+; RV32IF-NEXT: .LBB45_3: # %entry
; RV32IF-NEXT: slti a6, a4, 0
-; RV32IF-NEXT: .LBB45_5: # %entry
+; RV32IF-NEXT: .LBB45_4: # %entry
; RV32IF-NEXT: addi a7, a6, -1
; RV32IF-NEXT: neg t0, a6
-; RV32IF-NEXT: bnez a6, .LBB45_7
-; RV32IF-NEXT: # %bb.6: # %entry
+; RV32IF-NEXT: bnez a6, .LBB45_6
+; RV32IF-NEXT: # %bb.5: # %entry
; RV32IF-NEXT: mv a1, a5
-; RV32IF-NEXT: .LBB45_7: # %entry
+; RV32IF-NEXT: .LBB45_6: # %entry
; RV32IF-NEXT: or a3, a7, a3
; RV32IF-NEXT: and a4, t0, a4
; RV32IF-NEXT: and a2, t0, a2
-; RV32IF-NEXT: beq a1, a0, .LBB45_9
-; RV32IF-NEXT: # %bb.8: # %entry
+; RV32IF-NEXT: beq a1, a0, .LBB45_8
+; RV32IF-NEXT: # %bb.7: # %entry
; RV32IF-NEXT: sltu a0, a0, a1
-; RV32IF-NEXT: j .LBB45_10
-; RV32IF-NEXT: .LBB45_9:
+; RV32IF-NEXT: j .LBB45_9
+; RV32IF-NEXT: .LBB45_8:
; RV32IF-NEXT: snez a0, a3
-; RV32IF-NEXT: .LBB45_10: # %entry
+; RV32IF-NEXT: .LBB45_9: # %entry
; RV32IF-NEXT: and a2, a2, a4
; RV32IF-NEXT: li a5, -1
-; RV32IF-NEXT: beq a2, a5, .LBB45_12
-; RV32IF-NEXT: # %bb.11: # %entry
+; RV32IF-NEXT: beq a2, a5, .LBB45_11
+; RV32IF-NEXT: # %bb.10: # %entry
; RV32IF-NEXT: slti a0, a4, 0
; RV32IF-NEXT: xori a0, a0, 1
-; RV32IF-NEXT: .LBB45_12: # %entry
-; RV32IF-NEXT: bnez a0, .LBB45_14
-; RV32IF-NEXT: # %bb.13: # %entry
+; RV32IF-NEXT: .LBB45_11: # %entry
+; RV32IF-NEXT: bnez a0, .LBB45_13
+; RV32IF-NEXT: # %bb.12: # %entry
; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: .LBB45_14: # %entry
+; RV32IF-NEXT: .LBB45_13: # %entry
; RV32IF-NEXT: neg a0, a0
; RV32IF-NEXT: and a0, a0, a3
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3126,42 +3139,43 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: beq a1, a5, .LBB45_2
; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: sltu a6, a1, a5
-; RV32IFD-NEXT: j .LBB45_3
+; RV32IFD-NEXT: or a7, a2, a4
+; RV32IFD-NEXT: bnez a7, .LBB45_3
+; RV32IFD-NEXT: j .LBB45_4
; RV32IFD-NEXT: .LBB45_2:
; RV32IFD-NEXT: sltiu a6, a3, -1
-; RV32IFD-NEXT: .LBB45_3:
; RV32IFD-NEXT: or a7, a2, a4
-; RV32IFD-NEXT: beqz a7, .LBB45_5
-; RV32IFD-NEXT: # %bb.4: # %entry
+; RV32IFD-NEXT: beqz a7, .LBB45_4
+; RV32IFD-NEXT: .LBB45_3: # %entry
; RV32IFD-NEXT: slti a6, a4, 0
-; RV32IFD-NEXT: .LBB45_5: # %entry
+; RV32IFD-NEXT: .LBB45_4: # %entry
; RV32IFD-NEXT: addi a7, a6, -1
; RV32IFD-NEXT: neg t0, a6
-; RV32IFD-NEXT: bnez a6, .LBB45_7
-; RV32IFD-NEXT: # %bb.6: # %entry
+; RV32IFD-NEXT: bnez a6, .LBB45_6
+; RV32IFD-NEXT: # %bb.5: # %entry
; RV32IFD-NEXT: mv a1, a5
-; RV32IFD-NEXT: .LBB45_7: # %entry
+; RV32IFD-NEXT: .LBB45_6: # %entry
; RV32IFD-NEXT: or a3, a7, a3
; RV32IFD-NEXT: and a4, t0, a4
; RV32IFD-NEXT: and a2, t0, a2
-; RV32IFD-NEXT: beq a1, a0, .LBB45_9
-; RV32IFD-NEXT: # %bb.8: # %entry
+; RV32IFD-NEXT: beq a1, a0, .LBB45_8
+; RV32IFD-NEXT: # %bb.7: # %entry
; RV32IFD-NEXT: sltu a0, a0, a1
-; RV32IFD-NEXT: j .LBB45_10
-; RV32IFD-NEXT: .LBB45_9:
+; RV32IFD-NEXT: j .LBB45_9
+; RV32IFD-NEXT: .LBB45_8:
; RV32IFD-NEXT: snez a0, a3
-; RV32IFD-NEXT: .LBB45_10: # %entry
+; RV32IFD-NEXT: .LBB45_9: # %entry
; RV32IFD-NEXT: and a2, a2, a4
; RV32IFD-NEXT: li a5, -1
-; RV32IFD-NEXT: beq a2, a5, .LBB45_12
-; RV32IFD-NEXT: # %bb.11: # %entry
+; RV32IFD-NEXT: beq a2, a5, .LBB45_11
+; RV32IFD-NEXT: # %bb.10: # %entry
; RV32IFD-NEXT: slti a0, a4, 0
; RV32IFD-NEXT: xori a0, a0, 1
-; RV32IFD-NEXT: .LBB45_12: # %entry
-; RV32IFD-NEXT: bnez a0, .LBB45_14
-; RV32IFD-NEXT: # %bb.13: # %entry
+; RV32IFD-NEXT: .LBB45_11: # %entry
+; RV32IFD-NEXT: bnez a0, .LBB45_13
+; RV32IFD-NEXT: # %bb.12: # %entry
; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: .LBB45_14: # %entry
+; RV32IFD-NEXT: .LBB45_13: # %entry
; RV32IFD-NEXT: neg a0, a0
; RV32IFD-NEXT: and a0, a0, a3
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3395,42 +3409,43 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: beq a1, a5, .LBB48_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: j .LBB48_3
+; RV32-NEXT: or a7, a2, a4
+; RV32-NEXT: bnez a7, .LBB48_3
+; RV32-NEXT: j .LBB48_4
; RV32-NEXT: .LBB48_2:
; RV32-NEXT: sltiu a6, a3, -1
-; RV32-NEXT: .LBB48_3:
; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: beqz a7, .LBB48_5
-; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: beqz a7, .LBB48_4
+; RV32-NEXT: .LBB48_3: # %entry
; RV32-NEXT: slti a6, a4, 0
-; RV32-NEXT: .LBB48_5: # %entry
+; RV32-NEXT: .LBB48_4: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
-; RV32-NEXT: bnez a6, .LBB48_7
-; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: bnez a6, .LBB48_6
+; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
-; RV32-NEXT: .LBB48_7: # %entry
+; RV32-NEXT: .LBB48_6: # %entry
; RV32-NEXT: or a3, a7, a3
; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: beq a1, a0, .LBB48_9
-; RV32-NEXT: # %bb.8: # %entry
+; RV32-NEXT: beq a1, a0, .LBB48_8
+; RV32-NEXT: # %bb.7: # %entry
; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: j .LBB48_10
-; RV32-NEXT: .LBB48_9:
+; RV32-NEXT: j .LBB48_9
+; RV32-NEXT: .LBB48_8:
; RV32-NEXT: snez a0, a3
-; RV32-NEXT: .LBB48_10: # %entry
+; RV32-NEXT: .LBB48_9: # %entry
; RV32-NEXT: and a2, a2, a4
; RV32-NEXT: li a5, -1
-; RV32-NEXT: beq a2, a5, .LBB48_12
-; RV32-NEXT: # %bb.11: # %entry
+; RV32-NEXT: beq a2, a5, .LBB48_11
+; RV32-NEXT: # %bb.10: # %entry
; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: .LBB48_12: # %entry
-; RV32-NEXT: bnez a0, .LBB48_14
-; RV32-NEXT: # %bb.13: # %entry
+; RV32-NEXT: .LBB48_11: # %entry
+; RV32-NEXT: bnez a0, .LBB48_13
+; RV32-NEXT: # %bb.12: # %entry
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB48_14: # %entry
+; RV32-NEXT: .LBB48_13: # %entry
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -3595,42 +3610,43 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: beq a1, a5, .LBB51_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: j .LBB51_3
+; RV32-NEXT: or a7, a2, a4
+; RV32-NEXT: bnez a7, .LBB51_3
+; RV32-NEXT: j .LBB51_4
; RV32-NEXT: .LBB51_2:
; RV32-NEXT: sltiu a6, a3, -1
-; RV32-NEXT: .LBB51_3:
; RV32-NEXT: or a7, a2, a4
-; RV32-NEXT: beqz a7, .LBB51_5
-; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: beqz a7, .LBB51_4
+; RV32-NEXT: .LBB51_3: # %entry
; RV32-NEXT: slti a6, a4, 0
-; RV32-NEXT: .LBB51_5: # %entry
+; RV32-NEXT: .LBB51_4: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
-; RV32-NEXT: bnez a6, .LBB51_7
-; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: bnez a6, .LBB51_6
+; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
-; RV32-NEXT: .LBB51_7: # %entry
+; RV32-NEXT: .LBB51_6: # %entry
; RV32-NEXT: or a3, a7, a3
; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: beq a1, a0, .LBB51_9
-; RV32-NEXT: # %bb.8: # %entry
+; RV32-NEXT: beq a1, a0, .LBB51_8
+; RV32-NEXT: # %bb.7: # %entry
; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: j .LBB51_10
-; RV32-NEXT: .LBB51_9:
+; RV32-NEXT: j .LBB51_9
+; RV32-NEXT: .LBB51_8:
; RV32-NEXT: snez a0, a3
-; RV32-NEXT: .LBB51_10: # %entry
+; RV32-NEXT: .LBB51_9: # %entry
; RV32-NEXT: and a2, a2, a4
; RV32-NEXT: li a5, -1
-; RV32-NEXT: beq a2, a5, .LBB51_12
-; RV32-NEXT: # %bb.11: # %entry
+; RV32-NEXT: beq a2, a5, .LBB51_11
+; RV32-NEXT: # %bb.10: # %entry
; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: .LBB51_12: # %entry
-; RV32-NEXT: bnez a0, .LBB51_14
-; RV32-NEXT: # %bb.13: # %entry
+; RV32-NEXT: .LBB51_11: # %entry
+; RV32-NEXT: bnez a0, .LBB51_13
+; RV32-NEXT: # %bb.12: # %entry
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB51_14: # %entry
+; RV32-NEXT: .LBB51_13: # %entry
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll
index 104fa22027042..bc3f44363fb95 100644
--- a/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll
+++ b/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll
@@ -20,15 +20,16 @@ define half @fminimum_f16(half %a, half %b) nounwind {
; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
; CHECKIZFH-NEXT: fmv.h fa5, fa1
; CHECKIZFH-NEXT: beqz a0, .LBB0_3
-; CHECKIZFH-NEXT: .LBB0_1:
+; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: feq.h a0, fa1, fa1
; CHECKIZFH-NEXT: beqz a0, .LBB0_4
-; CHECKIZFH-NEXT: # %bb.2:
+; CHECKIZFH-NEXT: .LBB0_2:
; CHECKIZFH-NEXT: fmin.h fa0, fa0, fa5
; CHECKIZFH-NEXT: ret
; CHECKIZFH-NEXT: .LBB0_3:
; CHECKIZFH-NEXT: fmv.h fa5, fa0
-; CHECKIZFH-NEXT: j .LBB0_1
+; CHECKIZFH-NEXT: feq.h a0, fa1, fa1
+; CHECKIZFH-NEXT: bnez a0, .LBB0_2
; CHECKIZFH-NEXT: .LBB0_4:
; CHECKIZFH-NEXT: fmin.h fa0, fa1, fa5
; CHECKIZFH-NEXT: ret
@@ -38,15 +39,16 @@ define half @fminimum_f16(half %a, half %b) nounwind {
; CHECKIZHINX-NEXT: feq.h a3, a0, a0
; CHECKIZHINX-NEXT: mv a2, a1
; CHECKIZHINX-NEXT: beqz a3, .LBB0_3
-; CHECKIZHINX-NEXT: .LBB0_1:
+; CHECKIZHINX-NEXT: # %bb.1:
; CHECKIZHINX-NEXT: feq.h a3, a1, a1
; CHECKIZHINX-NEXT: beqz a3, .LBB0_4
-; CHECKIZHINX-NEXT: # %bb.2:
+; CHECKIZHINX-NEXT: .LBB0_2:
; CHECKIZHINX-NEXT: fmin.h a0, a0, a2
; CHECKIZHINX-NEXT: ret
; CHECKIZHINX-NEXT: .LBB0_3:
; CHECKIZHINX-NEXT: mv a2, a0
-; CHECKIZHINX-NEXT: j .LBB0_1
+; CHECKIZHINX-NEXT: feq.h a3, a1, a1
+; CHECKIZHINX-NEXT: bnez a3, .LBB0_2
; CHECKIZHINX-NEXT: .LBB0_4:
; CHECKIZHINX-NEXT: fmin.h a0, a1, a2
; CHECKIZHINX-NEXT: ret
@@ -62,15 +64,16 @@ define half @fmaximum_f16(half %a, half %b) nounwind {
; CHECKIZFH-NEXT: feq.h a0, fa0, fa0
; CHECKIZFH-NEXT: fmv.h fa5, fa1
; CHECKIZFH-NEXT: beqz a0, .LBB1_3
-; CHECKIZFH-NEXT: .LBB1_1:
+; CHECKIZFH-NEXT: # %bb.1:
; CHECKIZFH-NEXT: feq.h a0, fa1, fa1
; CHECKIZFH-NEXT: beqz a0, .LBB1_4
-; CHECKIZFH-NEXT: # %bb.2:
+; CHECKIZFH-NEXT: .LBB1_2:
; CHECKIZFH-NEXT: fmax.h fa0, fa0, fa5
; CHECKIZFH-NEXT: ret
; CHECKIZFH-NEXT: .LBB1_3:
; CHECKIZFH-NEXT: fmv.h fa5, fa0
-; CHECKIZFH-NEXT: j .LBB1_1
+; CHECKIZFH-NEXT: feq.h a0, fa1, fa1
+; CHECKIZFH-NEXT: bnez a0, .LBB1_2
; CHECKIZFH-NEXT: .LBB1_4:
; CHECKIZFH-NEXT: fmax.h fa0, fa1, fa5
; CHECKIZFH-NEXT: ret
@@ -80,15 +83,16 @@ define half @fmaximum_f16(half %a, half %b) nounwind {
; CHECKIZHINX-NEXT: feq.h a3, a0, a0
; CHECKIZHINX-NEXT: mv a2, a1
; CHECKIZHINX-NEXT: beqz a3, .LBB1_3
-; CHECKIZHINX-NEXT: .LBB1_1:
+; CHECKIZHINX-NEXT: # %bb.1:
; CHECKIZHINX-NEXT: feq.h a3, a1, a1
; CHECKIZHINX-NEXT: beqz a3, .LBB1_4
-; CHECKIZHINX-NEXT: # %bb.2:
+; CHECKIZHINX-NEXT: .LBB1_2:
; CHECKIZHINX-NEXT: fmax.h a0, a0, a2
; CHECKIZHINX-NEXT: ret
; CHECKIZHINX-NEXT: .LBB1_3:
; CHECKIZHINX-NEXT: mv a2, a0
-; CHECKIZHINX-NEXT: j .LBB1_1
+; CHECKIZHINX-NEXT: feq.h a3, a1, a1
+; CHECKIZHINX-NEXT: bnez a3, .LBB1_2
; CHECKIZHINX-NEXT: .LBB1_4:
; CHECKIZHINX-NEXT: fmax.h a0, a1, a2
; CHECKIZHINX-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/machine-pipeliner.ll b/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
index cc8a7270b289d..d250098576687 100644
--- a/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-pipeliner.ll
@@ -52,7 +52,7 @@ define void @test_pipelined_1(ptr noalias %in, ptr noalias %out, i32 signext %cn
;
; CHECK-PIPELINED-LABEL: test_pipelined_1:
; CHECK-PIPELINED: # %bb.0: # %entry
-; CHECK-PIPELINED-NEXT: blez a2, .LBB1_7
+; CHECK-PIPELINED-NEXT: blez a2, .LBB1_6
; CHECK-PIPELINED-NEXT: # %bb.1: # %for.body.preheader
; CHECK-PIPELINED-NEXT: lw a4, 0(a1)
; CHECK-PIPELINED-NEXT: addi a2, a2, -1
@@ -60,32 +60,32 @@ define void @test_pipelined_1(ptr noalias %in, ptr noalias %out, i32 signext %cn
; CHECK-PIPELINED-NEXT: addi a2, a0, 4
; CHECK-PIPELINED-NEXT: addi a1, a1, 4
; CHECK-PIPELINED-NEXT: addi a6, a6, 4
-; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_6
+; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_5
; CHECK-PIPELINED-NEXT: # %bb.2: # %for.body
; CHECK-PIPELINED-NEXT: lw a5, 0(a1)
; CHECK-PIPELINED-NEXT: addi a3, a2, 4
-; CHECK-PIPELINED-NEXT: j .LBB1_4
+; CHECK-PIPELINED-NEXT: addi a4, a4, 1
+; CHECK-PIPELINED-NEXT: addi a1, a1, 4
+; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_4
; CHECK-PIPELINED-NEXT: .LBB1_3: # %for.body
-; CHECK-PIPELINED-NEXT: # in Loop: Header=BB1_4 Depth=1
+; CHECK-PIPELINED-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
; CHECK-PIPELINED-NEXT: mv a4, a5
; CHECK-PIPELINED-NEXT: lw a5, 0(a1)
; CHECK-PIPELINED-NEXT: mv a0, a2
; CHECK-PIPELINED-NEXT: mv a2, a3
; CHECK-PIPELINED-NEXT: addi a3, a3, 4
-; CHECK-PIPELINED-NEXT: .LBB1_4: # %for.body
-; CHECK-PIPELINED-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-PIPELINED-NEXT: addi a4, a4, 1
; CHECK-PIPELINED-NEXT: addi a1, a1, 4
; CHECK-PIPELINED-NEXT: bne a1, a6, .LBB1_3
-; CHECK-PIPELINED-NEXT: # %bb.5:
+; CHECK-PIPELINED-NEXT: .LBB1_4:
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
; CHECK-PIPELINED-NEXT: mv a0, a2
; CHECK-PIPELINED-NEXT: mv a4, a5
-; CHECK-PIPELINED-NEXT: .LBB1_6:
+; CHECK-PIPELINED-NEXT: .LBB1_5:
; CHECK-PIPELINED-NEXT: addi a4, a4, 1
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
-; CHECK-PIPELINED-NEXT: .LBB1_7: # %for.end
+; CHECK-PIPELINED-NEXT: .LBB1_6: # %for.end
; CHECK-PIPELINED-NEXT: ret
entry:
%cmp = icmp sgt i32 %cnt, 0
diff --git a/llvm/test/CodeGen/RISCV/reduce-unnecessary-extension.ll b/llvm/test/CodeGen/RISCV/reduce-unnecessary-extension.ll
index 94f35a8cf660b..351408a7f085c 100644
--- a/llvm/test/CodeGen/RISCV/reduce-unnecessary-extension.ll
+++ b/llvm/test/CodeGen/RISCV/reduce-unnecessary-extension.ll
@@ -65,25 +65,25 @@ define signext i32 @test_loop() nounwind {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: li s1, -16
; RV64I-NEXT: lui s2, %hi(PL_reg_match_utf8)
-; RV64I-NEXT: j .LBB1_3
-; RV64I-NEXT: .LBB1_1: # in Loop: Header=BB1_3 Depth=1
+; RV64I-NEXT: j .LBB1_2
+; RV64I-NEXT: .LBB1_1: # in Loop: Header=BB1_2 Depth=1
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call test2
-; RV64I-NEXT: .LBB1_2: # in Loop: Header=BB1_3 Depth=1
; RV64I-NEXT: addiw s1, s1, 1
-; RV64I-NEXT: beqz s1, .LBB1_5
-; RV64I-NEXT: .LBB1_3: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: beqz s1, .LBB1_4
+; RV64I-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: lb s0, %lo(PL_reg_match_utf8)(s2)
; RV64I-NEXT: beqz s0, .LBB1_1
-; RV64I-NEXT: # %bb.4: # in Loop: Header=BB1_3 Depth=1
+; RV64I-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=1
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call test1
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call test2
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call test3
-; RV64I-NEXT: j .LBB1_2
-; RV64I-NEXT: .LBB1_5:
+; RV64I-NEXT: addiw s1, s1, 1
+; RV64I-NEXT: bnez s1, .LBB1_2
+; RV64I-NEXT: .LBB1_4:
; RV64I-NEXT: li a0, 0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
index 851b37f2887b4..0508016736004 100644
--- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -36,22 +36,25 @@ define i32 @test(i32 %n) {
; CHECK-O3-LABEL: test:
; CHECK-O3: # %bb.0: # %entry
; CHECK-O3-NEXT: sext.w a1, a0
-; CHECK-O3-NEXT: blez a1, .LBB0_3
+; CHECK-O3-NEXT: blez a1, .LBB0_2
; CHECK-O3-NEXT: # %bb.1: # %if.then
; CHECK-O3-NEXT: lui a1, %hi(a)
; CHECK-O3-NEXT: lw a1, %lo(a)(a1)
; CHECK-O3-NEXT: mul a0, a1, a0
-; CHECK-O3-NEXT: .LBB0_2: # %if.then
; CHECK-O3-NEXT: lui a1, %hi(c)
; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
; CHECK-O3-NEXT: addi a0, a0, -1
; CHECK-O3-NEXT: mulw a0, a0, a1
; CHECK-O3-NEXT: tail foo
-; CHECK-O3-NEXT: .LBB0_3: # %if.else
+; CHECK-O3-NEXT: .LBB0_2: # %if.else
; CHECK-O3-NEXT: lui a1, %hi(b)
; CHECK-O3-NEXT: lw a1, %lo(b)(a1)
; CHECK-O3-NEXT: divw a0, a1, a0
-; CHECK-O3-NEXT: j .LBB0_2
+; CHECK-O3-NEXT: lui a1, %hi(c)
+; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT: addi a0, a0, -1
+; CHECK-O3-NEXT: mulw a0, a0, a1
+; CHECK-O3-NEXT: tail foo
entry:
%cmp = icmp sgt i32 %n, 0
br i1 %cmp, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 09332f44626e7..90a8eadb3f974 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -876,26 +876,26 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
; RV32I-NEXT: lw a0, 0(a0)
; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a2, 12(a1)
-; RV32I-NEXT: beqz a3, .LBB22_4
+; RV32I-NEXT: beqz a3, .LBB22_3
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: sub a0, a3, a0
; RV32I-NEXT: xor a3, a3, a0
; RV32I-NEXT: sltu a0, a0, a3
-; RV32I-NEXT: .LBB22_2:
; RV32I-NEXT: lw a1, 8(a1)
-; RV32I-NEXT: bnez a2, .LBB22_5
-; RV32I-NEXT: # %bb.3:
+; RV32I-NEXT: bnez a2, .LBB22_4
+; RV32I-NEXT: .LBB22_2:
; RV32I-NEXT: addi a2, a1, -1
; RV32I-NEXT: xor a1, a1, a2
; RV32I-NEXT: sltu a1, a2, a1
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB22_4:
+; RV32I-NEXT: .LBB22_3:
; RV32I-NEXT: addi a3, a0, -1
; RV32I-NEXT: xor a0, a0, a3
; RV32I-NEXT: sltu a0, a3, a0
-; RV32I-NEXT: j .LBB22_2
-; RV32I-NEXT: .LBB22_5:
+; RV32I-NEXT: lw a1, 8(a1)
+; RV32I-NEXT: beqz a2, .LBB22_2
+; RV32I-NEXT: .LBB22_4:
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: sub a1, a2, a1
; RV32I-NEXT: xor a2, a2, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
index 4e36fbfd23cfc..be73d4808937a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
@@ -8,15 +8,16 @@
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: sf.vc.v.x 3, 31, v9, a1
; CHECK-NEXT: bgeu a0, zero, .LBB0_3
- ; CHECK-NEXT: .LBB0_1: # %entry
+ ; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: bltu a0, a2, .LBB0_4
- ; CHECK-NEXT: # %bb.2: # %entry
+ ; CHECK-NEXT: .LBB0_2: # %entry
; CHECK-NEXT: vse64.v v9, (a1)
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: vmv.v.i v9, 0
- ; CHECK-NEXT: j .LBB0_1
+ ; CHECK-NEXT: li a2, 128
+ ; CHECK-NEXT: bgeu a0, a2, .LBB0_2
; CHECK-NEXT: .LBB0_4: # %entry
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: vsll.vi v8, v8, 5
diff --git a/llvm/test/CodeGen/RISCV/rvv/expandload.ll b/llvm/test/CodeGen/RISCV/rvv/expandload.ll
index 54ecf7f7e4650..9173fa4622487 100644
--- a/llvm/test/CodeGen/RISCV/rvv/expandload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/expandload.ll
@@ -1641,7 +1641,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_28: # %else106
; CHECK-RV32-NEXT: slli a1, a3, 3
; CHECK-RV32-NEXT: bgez a1, .LBB61_30
-; CHECK-RV32-NEXT: # %bb.29: # %cond.load109
+; CHECK-RV32-NEXT: .LBB61_29: # %cond.load109
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 29, e8, m1, tu, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -1801,7 +1801,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_64: # %else238
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_66
-; CHECK-RV32-NEXT: # %bb.65: # %cond.load241
+; CHECK-RV32-NEXT: .LBB61_65: # %cond.load241
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -1953,7 +1953,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_98: # %else366
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_100
-; CHECK-RV32-NEXT: # %bb.99: # %cond.load369
+; CHECK-RV32-NEXT: .LBB61_99: # %cond.load369
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2105,7 +2105,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_132: # %else494
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_134
-; CHECK-RV32-NEXT: # %bb.133: # %cond.load497
+; CHECK-RV32-NEXT: .LBB61_133: # %cond.load497
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2257,7 +2257,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_166: # %else622
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_168
-; CHECK-RV32-NEXT: # %bb.167: # %cond.load625
+; CHECK-RV32-NEXT: .LBB61_167: # %cond.load625
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2409,7 +2409,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_200: # %else750
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_202
-; CHECK-RV32-NEXT: # %bb.201: # %cond.load753
+; CHECK-RV32-NEXT: .LBB61_201: # %cond.load753
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2561,7 +2561,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_234: # %else878
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_236
-; CHECK-RV32-NEXT: # %bb.235: # %cond.load881
+; CHECK-RV32-NEXT: .LBB61_235: # %cond.load881
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2713,7 +2713,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_268: # %else1006
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_270
-; CHECK-RV32-NEXT: # %bb.269: # %cond.load1009
+; CHECK-RV32-NEXT: .LBB61_269: # %cond.load1009
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -2865,7 +2865,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_302: # %else1134
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_304
-; CHECK-RV32-NEXT: # %bb.303: # %cond.load1137
+; CHECK-RV32-NEXT: .LBB61_303: # %cond.load1137
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3012,7 +3012,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_336: # %else1262
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_338
-; CHECK-RV32-NEXT: # %bb.337: # %cond.load1265
+; CHECK-RV32-NEXT: .LBB61_337: # %cond.load1265
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3159,7 +3159,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_370: # %else1390
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_372
-; CHECK-RV32-NEXT: # %bb.371: # %cond.load1393
+; CHECK-RV32-NEXT: .LBB61_371: # %cond.load1393
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3306,7 +3306,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_404: # %else1518
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_406
-; CHECK-RV32-NEXT: # %bb.405: # %cond.load1521
+; CHECK-RV32-NEXT: .LBB61_405: # %cond.load1521
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3453,7 +3453,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_438: # %else1646
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_440
-; CHECK-RV32-NEXT: # %bb.439: # %cond.load1649
+; CHECK-RV32-NEXT: .LBB61_439: # %cond.load1649
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3600,7 +3600,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_472: # %else1774
; CHECK-RV32-NEXT: slli a3, a2, 2
; CHECK-RV32-NEXT: bgez a3, .LBB61_474
-; CHECK-RV32-NEXT: # %bb.473: # %cond.load1777
+; CHECK-RV32-NEXT: .LBB61_473: # %cond.load1777
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3747,7 +3747,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: .LBB61_506: # %else1902
; CHECK-RV32-NEXT: slli a2, a3, 2
; CHECK-RV32-NEXT: bgez a2, .LBB61_508
-; CHECK-RV32-NEXT: # %bb.507: # %cond.load1905
+; CHECK-RV32-NEXT: .LBB61_507: # %cond.load1905
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -3912,7 +3912,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_1
+; CHECK-RV32-NEXT: andi a1, a3, 2
+; CHECK-RV32-NEXT: bnez a1, .LBB61_545
+; CHECK-RV32-NEXT: j .LBB61_2
; CHECK-RV32-NEXT: .LBB61_545: # %cond.load1
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, m1, tu, ma
@@ -3922,7 +3924,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_2
+; CHECK-RV32-NEXT: andi a1, a3, 4
+; CHECK-RV32-NEXT: bnez a1, .LBB61_546
+; CHECK-RV32-NEXT: j .LBB61_3
; CHECK-RV32-NEXT: .LBB61_546: # %cond.load5
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 3, e8, m1, tu, ma
@@ -3932,7 +3936,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_3
+; CHECK-RV32-NEXT: andi a1, a3, 8
+; CHECK-RV32-NEXT: bnez a1, .LBB61_547
+; CHECK-RV32-NEXT: j .LBB61_4
; CHECK-RV32-NEXT: .LBB61_547: # %cond.load9
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 4, e8, m1, tu, ma
@@ -3942,7 +3948,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_4
+; CHECK-RV32-NEXT: andi a1, a3, 16
+; CHECK-RV32-NEXT: bnez a1, .LBB61_548
+; CHECK-RV32-NEXT: j .LBB61_5
; CHECK-RV32-NEXT: .LBB61_548: # %cond.load13
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 5, e8, m1, tu, ma
@@ -3952,7 +3960,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_5
+; CHECK-RV32-NEXT: andi a1, a3, 32
+; CHECK-RV32-NEXT: bnez a1, .LBB61_549
+; CHECK-RV32-NEXT: j .LBB61_6
; CHECK-RV32-NEXT: .LBB61_549: # %cond.load17
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 6, e8, m1, tu, ma
@@ -3962,7 +3972,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_6
+; CHECK-RV32-NEXT: andi a1, a3, 64
+; CHECK-RV32-NEXT: bnez a1, .LBB61_550
+; CHECK-RV32-NEXT: j .LBB61_7
; CHECK-RV32-NEXT: .LBB61_550: # %cond.load21
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 7, e8, m1, tu, ma
@@ -3972,7 +3984,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_7
+; CHECK-RV32-NEXT: andi a1, a3, 128
+; CHECK-RV32-NEXT: bnez a1, .LBB61_551
+; CHECK-RV32-NEXT: j .LBB61_8
; CHECK-RV32-NEXT: .LBB61_551: # %cond.load25
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 8, e8, m1, tu, ma
@@ -3982,7 +3996,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_8
+; CHECK-RV32-NEXT: andi a1, a3, 256
+; CHECK-RV32-NEXT: bnez a1, .LBB61_552
+; CHECK-RV32-NEXT: j .LBB61_9
; CHECK-RV32-NEXT: .LBB61_552: # %cond.load29
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 9, e8, m1, tu, ma
@@ -3992,7 +4008,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_9
+; CHECK-RV32-NEXT: andi a1, a3, 512
+; CHECK-RV32-NEXT: bnez a1, .LBB61_553
+; CHECK-RV32-NEXT: j .LBB61_10
; CHECK-RV32-NEXT: .LBB61_553: # %cond.load33
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 10, e8, m1, tu, ma
@@ -4002,7 +4020,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_10
+; CHECK-RV32-NEXT: andi a1, a3, 1024
+; CHECK-RV32-NEXT: bnez a1, .LBB61_554
+; CHECK-RV32-NEXT: j .LBB61_11
; CHECK-RV32-NEXT: .LBB61_554: # %cond.load37
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 11, e8, m1, tu, ma
@@ -4012,7 +4032,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_11
+; CHECK-RV32-NEXT: slli a1, a3, 20
+; CHECK-RV32-NEXT: bltz a1, .LBB61_555
+; CHECK-RV32-NEXT: j .LBB61_12
; CHECK-RV32-NEXT: .LBB61_555: # %cond.load41
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 12, e8, m1, tu, ma
@@ -4022,7 +4044,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_12
+; CHECK-RV32-NEXT: slli a1, a3, 19
+; CHECK-RV32-NEXT: bltz a1, .LBB61_556
+; CHECK-RV32-NEXT: j .LBB61_13
; CHECK-RV32-NEXT: .LBB61_556: # %cond.load45
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 13, e8, m1, tu, ma
@@ -4032,7 +4056,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_13
+; CHECK-RV32-NEXT: slli a1, a3, 18
+; CHECK-RV32-NEXT: bltz a1, .LBB61_557
+; CHECK-RV32-NEXT: j .LBB61_14
; CHECK-RV32-NEXT: .LBB61_557: # %cond.load49
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 14, e8, m1, tu, ma
@@ -4042,7 +4068,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_14
+; CHECK-RV32-NEXT: slli a1, a3, 17
+; CHECK-RV32-NEXT: bltz a1, .LBB61_558
+; CHECK-RV32-NEXT: j .LBB61_15
; CHECK-RV32-NEXT: .LBB61_558: # %cond.load53
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 15, e8, m1, tu, ma
@@ -4052,7 +4080,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_15
+; CHECK-RV32-NEXT: slli a1, a3, 16
+; CHECK-RV32-NEXT: bltz a1, .LBB61_559
+; CHECK-RV32-NEXT: j .LBB61_16
; CHECK-RV32-NEXT: .LBB61_559: # %cond.load57
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 16, e8, m1, tu, ma
@@ -4062,7 +4092,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_16
+; CHECK-RV32-NEXT: slli a1, a3, 15
+; CHECK-RV32-NEXT: bltz a1, .LBB61_560
+; CHECK-RV32-NEXT: j .LBB61_17
; CHECK-RV32-NEXT: .LBB61_560: # %cond.load61
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 17, e8, m1, tu, ma
@@ -4072,7 +4104,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_17
+; CHECK-RV32-NEXT: slli a1, a3, 14
+; CHECK-RV32-NEXT: bltz a1, .LBB61_561
+; CHECK-RV32-NEXT: j .LBB61_18
; CHECK-RV32-NEXT: .LBB61_561: # %cond.load65
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 18, e8, m1, tu, ma
@@ -4082,7 +4116,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_18
+; CHECK-RV32-NEXT: slli a1, a3, 13
+; CHECK-RV32-NEXT: bltz a1, .LBB61_562
+; CHECK-RV32-NEXT: j .LBB61_19
; CHECK-RV32-NEXT: .LBB61_562: # %cond.load69
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 19, e8, m1, tu, ma
@@ -4092,7 +4128,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_19
+; CHECK-RV32-NEXT: slli a1, a3, 12
+; CHECK-RV32-NEXT: bltz a1, .LBB61_563
+; CHECK-RV32-NEXT: j .LBB61_20
; CHECK-RV32-NEXT: .LBB61_563: # %cond.load73
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 20, e8, m1, tu, ma
@@ -4102,7 +4140,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_20
+; CHECK-RV32-NEXT: slli a1, a3, 11
+; CHECK-RV32-NEXT: bltz a1, .LBB61_564
+; CHECK-RV32-NEXT: j .LBB61_21
; CHECK-RV32-NEXT: .LBB61_564: # %cond.load77
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 21, e8, m1, tu, ma
@@ -4112,7 +4152,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_21
+; CHECK-RV32-NEXT: slli a1, a3, 10
+; CHECK-RV32-NEXT: bltz a1, .LBB61_565
+; CHECK-RV32-NEXT: j .LBB61_22
; CHECK-RV32-NEXT: .LBB61_565: # %cond.load81
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 22, e8, m1, tu, ma
@@ -4122,7 +4164,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_22
+; CHECK-RV32-NEXT: slli a1, a3, 9
+; CHECK-RV32-NEXT: bltz a1, .LBB61_566
+; CHECK-RV32-NEXT: j .LBB61_23
; CHECK-RV32-NEXT: .LBB61_566: # %cond.load85
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 23, e8, m1, tu, ma
@@ -4132,7 +4176,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_23
+; CHECK-RV32-NEXT: slli a1, a3, 8
+; CHECK-RV32-NEXT: bltz a1, .LBB61_567
+; CHECK-RV32-NEXT: j .LBB61_24
; CHECK-RV32-NEXT: .LBB61_567: # %cond.load89
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 24, e8, m1, tu, ma
@@ -4142,7 +4188,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_24
+; CHECK-RV32-NEXT: slli a1, a3, 7
+; CHECK-RV32-NEXT: bltz a1, .LBB61_568
+; CHECK-RV32-NEXT: j .LBB61_25
; CHECK-RV32-NEXT: .LBB61_568: # %cond.load93
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 25, e8, m1, tu, ma
@@ -4152,7 +4200,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_25
+; CHECK-RV32-NEXT: slli a1, a3, 6
+; CHECK-RV32-NEXT: bltz a1, .LBB61_569
+; CHECK-RV32-NEXT: j .LBB61_26
; CHECK-RV32-NEXT: .LBB61_569: # %cond.load97
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 26, e8, m1, tu, ma
@@ -4162,7 +4212,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_26
+; CHECK-RV32-NEXT: slli a1, a3, 5
+; CHECK-RV32-NEXT: bltz a1, .LBB61_570
+; CHECK-RV32-NEXT: j .LBB61_27
; CHECK-RV32-NEXT: .LBB61_570: # %cond.load101
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 27, e8, m1, tu, ma
@@ -4172,7 +4224,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_27
+; CHECK-RV32-NEXT: slli a1, a3, 4
+; CHECK-RV32-NEXT: bltz a1, .LBB61_571
+; CHECK-RV32-NEXT: j .LBB61_28
; CHECK-RV32-NEXT: .LBB61_571: # %cond.load105
; CHECK-RV32-NEXT: lbu a1, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 28, e8, m1, tu, ma
@@ -4182,7 +4236,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_28
+; CHECK-RV32-NEXT: slli a1, a3, 3
+; CHECK-RV32-NEXT: bgez a1, .LBB61_1025
+; CHECK-RV32-NEXT: j .LBB61_29
+; CHECK-RV32-NEXT: .LBB61_1025: # %cond.load105
+; CHECK-RV32-NEXT: j .LBB61_30
; CHECK-RV32-NEXT: .LBB61_572: # %cond.load121
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -4193,7 +4251,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_35
+; CHECK-RV32-NEXT: andi a3, a2, 1
+; CHECK-RV32-NEXT: bnez a3, .LBB61_573
+; CHECK-RV32-NEXT: j .LBB61_36
; CHECK-RV32-NEXT: .LBB61_573: # %cond.load125
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4206,7 +4266,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_36
+; CHECK-RV32-NEXT: andi a3, a2, 2
+; CHECK-RV32-NEXT: bnez a3, .LBB61_574
+; CHECK-RV32-NEXT: j .LBB61_37
; CHECK-RV32-NEXT: .LBB61_574: # %cond.load129
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4219,7 +4281,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_37
+; CHECK-RV32-NEXT: andi a3, a2, 4
+; CHECK-RV32-NEXT: bnez a3, .LBB61_575
+; CHECK-RV32-NEXT: j .LBB61_38
; CHECK-RV32-NEXT: .LBB61_575: # %cond.load133
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4232,7 +4296,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_38
+; CHECK-RV32-NEXT: andi a3, a2, 8
+; CHECK-RV32-NEXT: bnez a3, .LBB61_576
+; CHECK-RV32-NEXT: j .LBB61_39
; CHECK-RV32-NEXT: .LBB61_576: # %cond.load137
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4245,7 +4311,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_39
+; CHECK-RV32-NEXT: andi a3, a2, 16
+; CHECK-RV32-NEXT: bnez a3, .LBB61_577
+; CHECK-RV32-NEXT: j .LBB61_40
; CHECK-RV32-NEXT: .LBB61_577: # %cond.load141
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4258,7 +4326,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_40
+; CHECK-RV32-NEXT: andi a3, a2, 32
+; CHECK-RV32-NEXT: bnez a3, .LBB61_578
+; CHECK-RV32-NEXT: j .LBB61_41
; CHECK-RV32-NEXT: .LBB61_578: # %cond.load145
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4271,7 +4341,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_41
+; CHECK-RV32-NEXT: andi a3, a2, 64
+; CHECK-RV32-NEXT: bnez a3, .LBB61_579
+; CHECK-RV32-NEXT: j .LBB61_42
; CHECK-RV32-NEXT: .LBB61_579: # %cond.load149
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4284,7 +4356,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_42
+; CHECK-RV32-NEXT: andi a3, a2, 128
+; CHECK-RV32-NEXT: bnez a3, .LBB61_580
+; CHECK-RV32-NEXT: j .LBB61_43
; CHECK-RV32-NEXT: .LBB61_580: # %cond.load153
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4297,7 +4371,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_43
+; CHECK-RV32-NEXT: andi a3, a2, 256
+; CHECK-RV32-NEXT: bnez a3, .LBB61_581
+; CHECK-RV32-NEXT: j .LBB61_44
; CHECK-RV32-NEXT: .LBB61_581: # %cond.load157
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4310,7 +4386,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_44
+; CHECK-RV32-NEXT: andi a3, a2, 512
+; CHECK-RV32-NEXT: bnez a3, .LBB61_582
+; CHECK-RV32-NEXT: j .LBB61_45
; CHECK-RV32-NEXT: .LBB61_582: # %cond.load161
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4323,7 +4401,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_45
+; CHECK-RV32-NEXT: andi a3, a2, 1024
+; CHECK-RV32-NEXT: bnez a3, .LBB61_583
+; CHECK-RV32-NEXT: j .LBB61_46
; CHECK-RV32-NEXT: .LBB61_583: # %cond.load165
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4336,7 +4416,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_46
+; CHECK-RV32-NEXT: slli a3, a2, 20
+; CHECK-RV32-NEXT: bltz a3, .LBB61_584
+; CHECK-RV32-NEXT: j .LBB61_47
; CHECK-RV32-NEXT: .LBB61_584: # %cond.load169
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4349,7 +4431,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_47
+; CHECK-RV32-NEXT: slli a3, a2, 19
+; CHECK-RV32-NEXT: bltz a3, .LBB61_585
+; CHECK-RV32-NEXT: j .LBB61_48
; CHECK-RV32-NEXT: .LBB61_585: # %cond.load173
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4362,7 +4446,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_48
+; CHECK-RV32-NEXT: slli a3, a2, 18
+; CHECK-RV32-NEXT: bltz a3, .LBB61_586
+; CHECK-RV32-NEXT: j .LBB61_49
; CHECK-RV32-NEXT: .LBB61_586: # %cond.load177
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4375,7 +4461,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_49
+; CHECK-RV32-NEXT: slli a3, a2, 17
+; CHECK-RV32-NEXT: bltz a3, .LBB61_587
+; CHECK-RV32-NEXT: j .LBB61_50
; CHECK-RV32-NEXT: .LBB61_587: # %cond.load181
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4388,7 +4476,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_50
+; CHECK-RV32-NEXT: slli a3, a2, 16
+; CHECK-RV32-NEXT: bltz a3, .LBB61_588
+; CHECK-RV32-NEXT: j .LBB61_51
; CHECK-RV32-NEXT: .LBB61_588: # %cond.load185
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4401,7 +4491,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_51
+; CHECK-RV32-NEXT: slli a3, a2, 15
+; CHECK-RV32-NEXT: bltz a3, .LBB61_589
+; CHECK-RV32-NEXT: j .LBB61_52
; CHECK-RV32-NEXT: .LBB61_589: # %cond.load189
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4414,7 +4506,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_52
+; CHECK-RV32-NEXT: slli a3, a2, 14
+; CHECK-RV32-NEXT: bltz a3, .LBB61_590
+; CHECK-RV32-NEXT: j .LBB61_53
; CHECK-RV32-NEXT: .LBB61_590: # %cond.load193
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4427,7 +4521,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_53
+; CHECK-RV32-NEXT: slli a3, a2, 13
+; CHECK-RV32-NEXT: bltz a3, .LBB61_591
+; CHECK-RV32-NEXT: j .LBB61_54
; CHECK-RV32-NEXT: .LBB61_591: # %cond.load197
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4440,7 +4536,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_54
+; CHECK-RV32-NEXT: slli a3, a2, 12
+; CHECK-RV32-NEXT: bltz a3, .LBB61_592
+; CHECK-RV32-NEXT: j .LBB61_55
; CHECK-RV32-NEXT: .LBB61_592: # %cond.load201
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4453,7 +4551,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_55
+; CHECK-RV32-NEXT: slli a3, a2, 11
+; CHECK-RV32-NEXT: bltz a3, .LBB61_593
+; CHECK-RV32-NEXT: j .LBB61_56
; CHECK-RV32-NEXT: .LBB61_593: # %cond.load205
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4466,7 +4566,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_56
+; CHECK-RV32-NEXT: slli a3, a2, 10
+; CHECK-RV32-NEXT: bltz a3, .LBB61_594
+; CHECK-RV32-NEXT: j .LBB61_57
; CHECK-RV32-NEXT: .LBB61_594: # %cond.load209
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4479,7 +4581,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_57
+; CHECK-RV32-NEXT: slli a3, a2, 9
+; CHECK-RV32-NEXT: bltz a3, .LBB61_595
+; CHECK-RV32-NEXT: j .LBB61_58
; CHECK-RV32-NEXT: .LBB61_595: # %cond.load213
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4492,7 +4596,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_58
+; CHECK-RV32-NEXT: slli a3, a2, 8
+; CHECK-RV32-NEXT: bltz a3, .LBB61_596
+; CHECK-RV32-NEXT: j .LBB61_59
; CHECK-RV32-NEXT: .LBB61_596: # %cond.load217
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4505,7 +4611,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_59
+; CHECK-RV32-NEXT: slli a3, a2, 7
+; CHECK-RV32-NEXT: bltz a3, .LBB61_597
+; CHECK-RV32-NEXT: j .LBB61_60
; CHECK-RV32-NEXT: .LBB61_597: # %cond.load221
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4518,7 +4626,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_60
+; CHECK-RV32-NEXT: slli a3, a2, 6
+; CHECK-RV32-NEXT: bltz a3, .LBB61_598
+; CHECK-RV32-NEXT: j .LBB61_61
; CHECK-RV32-NEXT: .LBB61_598: # %cond.load225
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4531,7 +4641,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_61
+; CHECK-RV32-NEXT: slli a3, a2, 5
+; CHECK-RV32-NEXT: bltz a3, .LBB61_599
+; CHECK-RV32-NEXT: j .LBB61_62
; CHECK-RV32-NEXT: .LBB61_599: # %cond.load229
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4544,7 +4656,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_62
+; CHECK-RV32-NEXT: slli a3, a2, 4
+; CHECK-RV32-NEXT: bltz a3, .LBB61_600
+; CHECK-RV32-NEXT: j .LBB61_63
; CHECK-RV32-NEXT: .LBB61_600: # %cond.load233
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4557,7 +4671,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_63
+; CHECK-RV32-NEXT: slli a3, a2, 3
+; CHECK-RV32-NEXT: bltz a3, .LBB61_601
+; CHECK-RV32-NEXT: j .LBB61_64
; CHECK-RV32-NEXT: .LBB61_601: # %cond.load237
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4570,7 +4686,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_64
+; CHECK-RV32-NEXT: slli a3, a2, 2
+; CHECK-RV32-NEXT: bgez a3, .LBB61_1026
+; CHECK-RV32-NEXT: j .LBB61_65
+; CHECK-RV32-NEXT: .LBB61_1026: # %cond.load237
+; CHECK-RV32-NEXT: j .LBB61_66
; CHECK-RV32-NEXT: .LBB61_602: # %cond.load249
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -4582,7 +4702,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv1r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_69
+; CHECK-RV32-NEXT: andi a2, a3, 1
+; CHECK-RV32-NEXT: bnez a2, .LBB61_603
+; CHECK-RV32-NEXT: j .LBB61_70
; CHECK-RV32-NEXT: .LBB61_603: # %cond.load253
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4595,7 +4717,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_70
+; CHECK-RV32-NEXT: andi a2, a3, 2
+; CHECK-RV32-NEXT: bnez a2, .LBB61_604
+; CHECK-RV32-NEXT: j .LBB61_71
; CHECK-RV32-NEXT: .LBB61_604: # %cond.load257
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4608,7 +4732,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_71
+; CHECK-RV32-NEXT: andi a2, a3, 4
+; CHECK-RV32-NEXT: bnez a2, .LBB61_605
+; CHECK-RV32-NEXT: j .LBB61_72
; CHECK-RV32-NEXT: .LBB61_605: # %cond.load261
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4621,7 +4747,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_72
+; CHECK-RV32-NEXT: andi a2, a3, 8
+; CHECK-RV32-NEXT: bnez a2, .LBB61_606
+; CHECK-RV32-NEXT: j .LBB61_73
; CHECK-RV32-NEXT: .LBB61_606: # %cond.load265
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4634,7 +4762,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_73
+; CHECK-RV32-NEXT: andi a2, a3, 16
+; CHECK-RV32-NEXT: bnez a2, .LBB61_607
+; CHECK-RV32-NEXT: j .LBB61_74
; CHECK-RV32-NEXT: .LBB61_607: # %cond.load269
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4647,7 +4777,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_74
+; CHECK-RV32-NEXT: andi a2, a3, 32
+; CHECK-RV32-NEXT: bnez a2, .LBB61_608
+; CHECK-RV32-NEXT: j .LBB61_75
; CHECK-RV32-NEXT: .LBB61_608: # %cond.load273
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4660,7 +4792,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_75
+; CHECK-RV32-NEXT: andi a2, a3, 64
+; CHECK-RV32-NEXT: bnez a2, .LBB61_609
+; CHECK-RV32-NEXT: j .LBB61_76
; CHECK-RV32-NEXT: .LBB61_609: # %cond.load277
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4673,7 +4807,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_76
+; CHECK-RV32-NEXT: andi a2, a3, 128
+; CHECK-RV32-NEXT: bnez a2, .LBB61_610
+; CHECK-RV32-NEXT: j .LBB61_77
; CHECK-RV32-NEXT: .LBB61_610: # %cond.load281
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4686,7 +4822,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_77
+; CHECK-RV32-NEXT: andi a2, a3, 256
+; CHECK-RV32-NEXT: bnez a2, .LBB61_611
+; CHECK-RV32-NEXT: j .LBB61_78
; CHECK-RV32-NEXT: .LBB61_611: # %cond.load285
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4699,7 +4837,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_78
+; CHECK-RV32-NEXT: andi a2, a3, 512
+; CHECK-RV32-NEXT: bnez a2, .LBB61_612
+; CHECK-RV32-NEXT: j .LBB61_79
; CHECK-RV32-NEXT: .LBB61_612: # %cond.load289
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4712,7 +4852,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_79
+; CHECK-RV32-NEXT: andi a2, a3, 1024
+; CHECK-RV32-NEXT: bnez a2, .LBB61_613
+; CHECK-RV32-NEXT: j .LBB61_80
; CHECK-RV32-NEXT: .LBB61_613: # %cond.load293
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4725,7 +4867,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_80
+; CHECK-RV32-NEXT: slli a2, a3, 20
+; CHECK-RV32-NEXT: bltz a2, .LBB61_614
+; CHECK-RV32-NEXT: j .LBB61_81
; CHECK-RV32-NEXT: .LBB61_614: # %cond.load297
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4738,7 +4882,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_81
+; CHECK-RV32-NEXT: slli a2, a3, 19
+; CHECK-RV32-NEXT: bltz a2, .LBB61_615
+; CHECK-RV32-NEXT: j .LBB61_82
; CHECK-RV32-NEXT: .LBB61_615: # %cond.load301
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4751,7 +4897,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_82
+; CHECK-RV32-NEXT: slli a2, a3, 18
+; CHECK-RV32-NEXT: bltz a2, .LBB61_616
+; CHECK-RV32-NEXT: j .LBB61_83
; CHECK-RV32-NEXT: .LBB61_616: # %cond.load305
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4764,7 +4912,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_83
+; CHECK-RV32-NEXT: slli a2, a3, 17
+; CHECK-RV32-NEXT: bltz a2, .LBB61_617
+; CHECK-RV32-NEXT: j .LBB61_84
; CHECK-RV32-NEXT: .LBB61_617: # %cond.load309
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4777,7 +4927,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_84
+; CHECK-RV32-NEXT: slli a2, a3, 16
+; CHECK-RV32-NEXT: bltz a2, .LBB61_618
+; CHECK-RV32-NEXT: j .LBB61_85
; CHECK-RV32-NEXT: .LBB61_618: # %cond.load313
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4790,7 +4942,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_85
+; CHECK-RV32-NEXT: slli a2, a3, 15
+; CHECK-RV32-NEXT: bltz a2, .LBB61_619
+; CHECK-RV32-NEXT: j .LBB61_86
; CHECK-RV32-NEXT: .LBB61_619: # %cond.load317
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4803,7 +4957,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_86
+; CHECK-RV32-NEXT: slli a2, a3, 14
+; CHECK-RV32-NEXT: bltz a2, .LBB61_620
+; CHECK-RV32-NEXT: j .LBB61_87
; CHECK-RV32-NEXT: .LBB61_620: # %cond.load321
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4816,7 +4972,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_87
+; CHECK-RV32-NEXT: slli a2, a3, 13
+; CHECK-RV32-NEXT: bltz a2, .LBB61_621
+; CHECK-RV32-NEXT: j .LBB61_88
; CHECK-RV32-NEXT: .LBB61_621: # %cond.load325
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4829,7 +4987,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_88
+; CHECK-RV32-NEXT: slli a2, a3, 12
+; CHECK-RV32-NEXT: bltz a2, .LBB61_622
+; CHECK-RV32-NEXT: j .LBB61_89
; CHECK-RV32-NEXT: .LBB61_622: # %cond.load329
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4842,7 +5002,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_89
+; CHECK-RV32-NEXT: slli a2, a3, 11
+; CHECK-RV32-NEXT: bltz a2, .LBB61_623
+; CHECK-RV32-NEXT: j .LBB61_90
; CHECK-RV32-NEXT: .LBB61_623: # %cond.load333
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4855,7 +5017,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_90
+; CHECK-RV32-NEXT: slli a2, a3, 10
+; CHECK-RV32-NEXT: bltz a2, .LBB61_624
+; CHECK-RV32-NEXT: j .LBB61_91
; CHECK-RV32-NEXT: .LBB61_624: # %cond.load337
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4868,7 +5032,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_91
+; CHECK-RV32-NEXT: slli a2, a3, 9
+; CHECK-RV32-NEXT: bltz a2, .LBB61_625
+; CHECK-RV32-NEXT: j .LBB61_92
; CHECK-RV32-NEXT: .LBB61_625: # %cond.load341
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4881,7 +5047,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_92
+; CHECK-RV32-NEXT: slli a2, a3, 8
+; CHECK-RV32-NEXT: bltz a2, .LBB61_626
+; CHECK-RV32-NEXT: j .LBB61_93
; CHECK-RV32-NEXT: .LBB61_626: # %cond.load345
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4894,7 +5062,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_93
+; CHECK-RV32-NEXT: slli a2, a3, 7
+; CHECK-RV32-NEXT: bltz a2, .LBB61_627
+; CHECK-RV32-NEXT: j .LBB61_94
; CHECK-RV32-NEXT: .LBB61_627: # %cond.load349
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4907,7 +5077,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_94
+; CHECK-RV32-NEXT: slli a2, a3, 6
+; CHECK-RV32-NEXT: bltz a2, .LBB61_628
+; CHECK-RV32-NEXT: j .LBB61_95
; CHECK-RV32-NEXT: .LBB61_628: # %cond.load353
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4920,7 +5092,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_95
+; CHECK-RV32-NEXT: slli a2, a3, 5
+; CHECK-RV32-NEXT: bltz a2, .LBB61_629
+; CHECK-RV32-NEXT: j .LBB61_96
; CHECK-RV32-NEXT: .LBB61_629: # %cond.load357
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4933,7 +5107,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_96
+; CHECK-RV32-NEXT: slli a2, a3, 4
+; CHECK-RV32-NEXT: bltz a2, .LBB61_630
+; CHECK-RV32-NEXT: j .LBB61_97
; CHECK-RV32-NEXT: .LBB61_630: # %cond.load361
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4946,7 +5122,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_97
+; CHECK-RV32-NEXT: slli a2, a3, 3
+; CHECK-RV32-NEXT: bltz a2, .LBB61_631
+; CHECK-RV32-NEXT: j .LBB61_98
; CHECK-RV32-NEXT: .LBB61_631: # %cond.load365
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4959,7 +5137,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_98
+; CHECK-RV32-NEXT: slli a2, a3, 2
+; CHECK-RV32-NEXT: bgez a2, .LBB61_1027
+; CHECK-RV32-NEXT: j .LBB61_99
+; CHECK-RV32-NEXT: .LBB61_1027: # %cond.load365
+; CHECK-RV32-NEXT: j .LBB61_100
; CHECK-RV32-NEXT: .LBB61_632: # %cond.load377
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -4971,7 +5153,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_103
+; CHECK-RV32-NEXT: andi a3, a2, 1
+; CHECK-RV32-NEXT: bnez a3, .LBB61_633
+; CHECK-RV32-NEXT: j .LBB61_104
; CHECK-RV32-NEXT: .LBB61_633: # %cond.load381
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4984,7 +5168,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_104
+; CHECK-RV32-NEXT: andi a3, a2, 2
+; CHECK-RV32-NEXT: bnez a3, .LBB61_634
+; CHECK-RV32-NEXT: j .LBB61_105
; CHECK-RV32-NEXT: .LBB61_634: # %cond.load385
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -4997,7 +5183,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_105
+; CHECK-RV32-NEXT: andi a3, a2, 4
+; CHECK-RV32-NEXT: bnez a3, .LBB61_635
+; CHECK-RV32-NEXT: j .LBB61_106
; CHECK-RV32-NEXT: .LBB61_635: # %cond.load389
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5010,7 +5198,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_106
+; CHECK-RV32-NEXT: andi a3, a2, 8
+; CHECK-RV32-NEXT: bnez a3, .LBB61_636
+; CHECK-RV32-NEXT: j .LBB61_107
; CHECK-RV32-NEXT: .LBB61_636: # %cond.load393
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5023,7 +5213,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_107
+; CHECK-RV32-NEXT: andi a3, a2, 16
+; CHECK-RV32-NEXT: bnez a3, .LBB61_637
+; CHECK-RV32-NEXT: j .LBB61_108
; CHECK-RV32-NEXT: .LBB61_637: # %cond.load397
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5036,7 +5228,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_108
+; CHECK-RV32-NEXT: andi a3, a2, 32
+; CHECK-RV32-NEXT: bnez a3, .LBB61_638
+; CHECK-RV32-NEXT: j .LBB61_109
; CHECK-RV32-NEXT: .LBB61_638: # %cond.load401
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5049,7 +5243,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_109
+; CHECK-RV32-NEXT: andi a3, a2, 64
+; CHECK-RV32-NEXT: bnez a3, .LBB61_639
+; CHECK-RV32-NEXT: j .LBB61_110
; CHECK-RV32-NEXT: .LBB61_639: # %cond.load405
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5062,7 +5258,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_110
+; CHECK-RV32-NEXT: andi a3, a2, 128
+; CHECK-RV32-NEXT: bnez a3, .LBB61_640
+; CHECK-RV32-NEXT: j .LBB61_111
; CHECK-RV32-NEXT: .LBB61_640: # %cond.load409
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5075,7 +5273,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_111
+; CHECK-RV32-NEXT: andi a3, a2, 256
+; CHECK-RV32-NEXT: bnez a3, .LBB61_641
+; CHECK-RV32-NEXT: j .LBB61_112
; CHECK-RV32-NEXT: .LBB61_641: # %cond.load413
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5088,7 +5288,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_112
+; CHECK-RV32-NEXT: andi a3, a2, 512
+; CHECK-RV32-NEXT: bnez a3, .LBB61_642
+; CHECK-RV32-NEXT: j .LBB61_113
; CHECK-RV32-NEXT: .LBB61_642: # %cond.load417
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5101,7 +5303,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_113
+; CHECK-RV32-NEXT: andi a3, a2, 1024
+; CHECK-RV32-NEXT: bnez a3, .LBB61_643
+; CHECK-RV32-NEXT: j .LBB61_114
; CHECK-RV32-NEXT: .LBB61_643: # %cond.load421
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5114,7 +5318,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_114
+; CHECK-RV32-NEXT: slli a3, a2, 20
+; CHECK-RV32-NEXT: bltz a3, .LBB61_644
+; CHECK-RV32-NEXT: j .LBB61_115
; CHECK-RV32-NEXT: .LBB61_644: # %cond.load425
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5127,7 +5333,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_115
+; CHECK-RV32-NEXT: slli a3, a2, 19
+; CHECK-RV32-NEXT: bltz a3, .LBB61_645
+; CHECK-RV32-NEXT: j .LBB61_116
; CHECK-RV32-NEXT: .LBB61_645: # %cond.load429
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5140,7 +5348,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_116
+; CHECK-RV32-NEXT: slli a3, a2, 18
+; CHECK-RV32-NEXT: bltz a3, .LBB61_646
+; CHECK-RV32-NEXT: j .LBB61_117
; CHECK-RV32-NEXT: .LBB61_646: # %cond.load433
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5153,7 +5363,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_117
+; CHECK-RV32-NEXT: slli a3, a2, 17
+; CHECK-RV32-NEXT: bltz a3, .LBB61_647
+; CHECK-RV32-NEXT: j .LBB61_118
; CHECK-RV32-NEXT: .LBB61_647: # %cond.load437
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5166,7 +5378,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_118
+; CHECK-RV32-NEXT: slli a3, a2, 16
+; CHECK-RV32-NEXT: bltz a3, .LBB61_648
+; CHECK-RV32-NEXT: j .LBB61_119
; CHECK-RV32-NEXT: .LBB61_648: # %cond.load441
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5179,7 +5393,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_119
+; CHECK-RV32-NEXT: slli a3, a2, 15
+; CHECK-RV32-NEXT: bltz a3, .LBB61_649
+; CHECK-RV32-NEXT: j .LBB61_120
; CHECK-RV32-NEXT: .LBB61_649: # %cond.load445
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5192,7 +5408,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_120
+; CHECK-RV32-NEXT: slli a3, a2, 14
+; CHECK-RV32-NEXT: bltz a3, .LBB61_650
+; CHECK-RV32-NEXT: j .LBB61_121
; CHECK-RV32-NEXT: .LBB61_650: # %cond.load449
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5205,7 +5423,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_121
+; CHECK-RV32-NEXT: slli a3, a2, 13
+; CHECK-RV32-NEXT: bltz a3, .LBB61_651
+; CHECK-RV32-NEXT: j .LBB61_122
; CHECK-RV32-NEXT: .LBB61_651: # %cond.load453
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5218,7 +5438,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_122
+; CHECK-RV32-NEXT: slli a3, a2, 12
+; CHECK-RV32-NEXT: bltz a3, .LBB61_652
+; CHECK-RV32-NEXT: j .LBB61_123
; CHECK-RV32-NEXT: .LBB61_652: # %cond.load457
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5231,7 +5453,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_123
+; CHECK-RV32-NEXT: slli a3, a2, 11
+; CHECK-RV32-NEXT: bltz a3, .LBB61_653
+; CHECK-RV32-NEXT: j .LBB61_124
; CHECK-RV32-NEXT: .LBB61_653: # %cond.load461
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5244,7 +5468,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_124
+; CHECK-RV32-NEXT: slli a3, a2, 10
+; CHECK-RV32-NEXT: bltz a3, .LBB61_654
+; CHECK-RV32-NEXT: j .LBB61_125
; CHECK-RV32-NEXT: .LBB61_654: # %cond.load465
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5257,7 +5483,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_125
+; CHECK-RV32-NEXT: slli a3, a2, 9
+; CHECK-RV32-NEXT: bltz a3, .LBB61_655
+; CHECK-RV32-NEXT: j .LBB61_126
; CHECK-RV32-NEXT: .LBB61_655: # %cond.load469
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5270,7 +5498,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_126
+; CHECK-RV32-NEXT: slli a3, a2, 8
+; CHECK-RV32-NEXT: bltz a3, .LBB61_656
+; CHECK-RV32-NEXT: j .LBB61_127
; CHECK-RV32-NEXT: .LBB61_656: # %cond.load473
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5283,7 +5513,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_127
+; CHECK-RV32-NEXT: slli a3, a2, 7
+; CHECK-RV32-NEXT: bltz a3, .LBB61_657
+; CHECK-RV32-NEXT: j .LBB61_128
; CHECK-RV32-NEXT: .LBB61_657: # %cond.load477
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5296,7 +5528,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_128
+; CHECK-RV32-NEXT: slli a3, a2, 6
+; CHECK-RV32-NEXT: bltz a3, .LBB61_658
+; CHECK-RV32-NEXT: j .LBB61_129
; CHECK-RV32-NEXT: .LBB61_658: # %cond.load481
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5309,7 +5543,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_129
+; CHECK-RV32-NEXT: slli a3, a2, 5
+; CHECK-RV32-NEXT: bltz a3, .LBB61_659
+; CHECK-RV32-NEXT: j .LBB61_130
; CHECK-RV32-NEXT: .LBB61_659: # %cond.load485
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5322,7 +5558,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_130
+; CHECK-RV32-NEXT: slli a3, a2, 4
+; CHECK-RV32-NEXT: bltz a3, .LBB61_660
+; CHECK-RV32-NEXT: j .LBB61_131
; CHECK-RV32-NEXT: .LBB61_660: # %cond.load489
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5335,7 +5573,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_131
+; CHECK-RV32-NEXT: slli a3, a2, 3
+; CHECK-RV32-NEXT: bltz a3, .LBB61_661
+; CHECK-RV32-NEXT: j .LBB61_132
; CHECK-RV32-NEXT: .LBB61_661: # %cond.load493
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5348,7 +5588,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_132
+; CHECK-RV32-NEXT: slli a3, a2, 2
+; CHECK-RV32-NEXT: bgez a3, .LBB61_1028
+; CHECK-RV32-NEXT: j .LBB61_133
+; CHECK-RV32-NEXT: .LBB61_1028: # %cond.load493
+; CHECK-RV32-NEXT: j .LBB61_134
; CHECK-RV32-NEXT: .LBB61_662: # %cond.load505
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -5360,7 +5604,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv2r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_137
+; CHECK-RV32-NEXT: andi a2, a3, 1
+; CHECK-RV32-NEXT: bnez a2, .LBB61_663
+; CHECK-RV32-NEXT: j .LBB61_138
; CHECK-RV32-NEXT: .LBB61_663: # %cond.load509
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5373,7 +5619,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_138
+; CHECK-RV32-NEXT: andi a2, a3, 2
+; CHECK-RV32-NEXT: bnez a2, .LBB61_664
+; CHECK-RV32-NEXT: j .LBB61_139
; CHECK-RV32-NEXT: .LBB61_664: # %cond.load513
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5386,7 +5634,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_139
+; CHECK-RV32-NEXT: andi a2, a3, 4
+; CHECK-RV32-NEXT: bnez a2, .LBB61_665
+; CHECK-RV32-NEXT: j .LBB61_140
; CHECK-RV32-NEXT: .LBB61_665: # %cond.load517
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5399,7 +5649,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_140
+; CHECK-RV32-NEXT: andi a2, a3, 8
+; CHECK-RV32-NEXT: bnez a2, .LBB61_666
+; CHECK-RV32-NEXT: j .LBB61_141
; CHECK-RV32-NEXT: .LBB61_666: # %cond.load521
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5412,7 +5664,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_141
+; CHECK-RV32-NEXT: andi a2, a3, 16
+; CHECK-RV32-NEXT: bnez a2, .LBB61_667
+; CHECK-RV32-NEXT: j .LBB61_142
; CHECK-RV32-NEXT: .LBB61_667: # %cond.load525
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5425,7 +5679,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_142
+; CHECK-RV32-NEXT: andi a2, a3, 32
+; CHECK-RV32-NEXT: bnez a2, .LBB61_668
+; CHECK-RV32-NEXT: j .LBB61_143
; CHECK-RV32-NEXT: .LBB61_668: # %cond.load529
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5438,7 +5694,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_143
+; CHECK-RV32-NEXT: andi a2, a3, 64
+; CHECK-RV32-NEXT: bnez a2, .LBB61_669
+; CHECK-RV32-NEXT: j .LBB61_144
; CHECK-RV32-NEXT: .LBB61_669: # %cond.load533
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5451,7 +5709,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_144
+; CHECK-RV32-NEXT: andi a2, a3, 128
+; CHECK-RV32-NEXT: bnez a2, .LBB61_670
+; CHECK-RV32-NEXT: j .LBB61_145
; CHECK-RV32-NEXT: .LBB61_670: # %cond.load537
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5464,7 +5724,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_145
+; CHECK-RV32-NEXT: andi a2, a3, 256
+; CHECK-RV32-NEXT: bnez a2, .LBB61_671
+; CHECK-RV32-NEXT: j .LBB61_146
; CHECK-RV32-NEXT: .LBB61_671: # %cond.load541
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5477,7 +5739,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_146
+; CHECK-RV32-NEXT: andi a2, a3, 512
+; CHECK-RV32-NEXT: bnez a2, .LBB61_672
+; CHECK-RV32-NEXT: j .LBB61_147
; CHECK-RV32-NEXT: .LBB61_672: # %cond.load545
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5490,7 +5754,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_147
+; CHECK-RV32-NEXT: andi a2, a3, 1024
+; CHECK-RV32-NEXT: bnez a2, .LBB61_673
+; CHECK-RV32-NEXT: j .LBB61_148
; CHECK-RV32-NEXT: .LBB61_673: # %cond.load549
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5503,7 +5769,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_148
+; CHECK-RV32-NEXT: slli a2, a3, 20
+; CHECK-RV32-NEXT: bltz a2, .LBB61_674
+; CHECK-RV32-NEXT: j .LBB61_149
; CHECK-RV32-NEXT: .LBB61_674: # %cond.load553
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5516,7 +5784,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_149
+; CHECK-RV32-NEXT: slli a2, a3, 19
+; CHECK-RV32-NEXT: bltz a2, .LBB61_675
+; CHECK-RV32-NEXT: j .LBB61_150
; CHECK-RV32-NEXT: .LBB61_675: # %cond.load557
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5529,7 +5799,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_150
+; CHECK-RV32-NEXT: slli a2, a3, 18
+; CHECK-RV32-NEXT: bltz a2, .LBB61_676
+; CHECK-RV32-NEXT: j .LBB61_151
; CHECK-RV32-NEXT: .LBB61_676: # %cond.load561
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5542,7 +5814,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_151
+; CHECK-RV32-NEXT: slli a2, a3, 17
+; CHECK-RV32-NEXT: bltz a2, .LBB61_677
+; CHECK-RV32-NEXT: j .LBB61_152
; CHECK-RV32-NEXT: .LBB61_677: # %cond.load565
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5555,7 +5829,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_152
+; CHECK-RV32-NEXT: slli a2, a3, 16
+; CHECK-RV32-NEXT: bltz a2, .LBB61_678
+; CHECK-RV32-NEXT: j .LBB61_153
; CHECK-RV32-NEXT: .LBB61_678: # %cond.load569
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5568,7 +5844,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_153
+; CHECK-RV32-NEXT: slli a2, a3, 15
+; CHECK-RV32-NEXT: bltz a2, .LBB61_679
+; CHECK-RV32-NEXT: j .LBB61_154
; CHECK-RV32-NEXT: .LBB61_679: # %cond.load573
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5581,7 +5859,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_154
+; CHECK-RV32-NEXT: slli a2, a3, 14
+; CHECK-RV32-NEXT: bltz a2, .LBB61_680
+; CHECK-RV32-NEXT: j .LBB61_155
; CHECK-RV32-NEXT: .LBB61_680: # %cond.load577
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5594,7 +5874,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_155
+; CHECK-RV32-NEXT: slli a2, a3, 13
+; CHECK-RV32-NEXT: bltz a2, .LBB61_681
+; CHECK-RV32-NEXT: j .LBB61_156
; CHECK-RV32-NEXT: .LBB61_681: # %cond.load581
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5607,7 +5889,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_156
+; CHECK-RV32-NEXT: slli a2, a3, 12
+; CHECK-RV32-NEXT: bltz a2, .LBB61_682
+; CHECK-RV32-NEXT: j .LBB61_157
; CHECK-RV32-NEXT: .LBB61_682: # %cond.load585
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5620,7 +5904,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_157
+; CHECK-RV32-NEXT: slli a2, a3, 11
+; CHECK-RV32-NEXT: bltz a2, .LBB61_683
+; CHECK-RV32-NEXT: j .LBB61_158
; CHECK-RV32-NEXT: .LBB61_683: # %cond.load589
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5633,7 +5919,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_158
+; CHECK-RV32-NEXT: slli a2, a3, 10
+; CHECK-RV32-NEXT: bltz a2, .LBB61_684
+; CHECK-RV32-NEXT: j .LBB61_159
; CHECK-RV32-NEXT: .LBB61_684: # %cond.load593
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5646,7 +5934,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_159
+; CHECK-RV32-NEXT: slli a2, a3, 9
+; CHECK-RV32-NEXT: bltz a2, .LBB61_685
+; CHECK-RV32-NEXT: j .LBB61_160
; CHECK-RV32-NEXT: .LBB61_685: # %cond.load597
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5659,7 +5949,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_160
+; CHECK-RV32-NEXT: slli a2, a3, 8
+; CHECK-RV32-NEXT: bltz a2, .LBB61_686
+; CHECK-RV32-NEXT: j .LBB61_161
; CHECK-RV32-NEXT: .LBB61_686: # %cond.load601
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5672,7 +5964,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_161
+; CHECK-RV32-NEXT: slli a2, a3, 7
+; CHECK-RV32-NEXT: bltz a2, .LBB61_687
+; CHECK-RV32-NEXT: j .LBB61_162
; CHECK-RV32-NEXT: .LBB61_687: # %cond.load605
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5685,7 +5979,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_162
+; CHECK-RV32-NEXT: slli a2, a3, 6
+; CHECK-RV32-NEXT: bltz a2, .LBB61_688
+; CHECK-RV32-NEXT: j .LBB61_163
; CHECK-RV32-NEXT: .LBB61_688: # %cond.load609
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5698,7 +5994,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_163
+; CHECK-RV32-NEXT: slli a2, a3, 5
+; CHECK-RV32-NEXT: bltz a2, .LBB61_689
+; CHECK-RV32-NEXT: j .LBB61_164
; CHECK-RV32-NEXT: .LBB61_689: # %cond.load613
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5711,7 +6009,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_164
+; CHECK-RV32-NEXT: slli a2, a3, 4
+; CHECK-RV32-NEXT: bltz a2, .LBB61_690
+; CHECK-RV32-NEXT: j .LBB61_165
; CHECK-RV32-NEXT: .LBB61_690: # %cond.load617
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5724,7 +6024,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_165
+; CHECK-RV32-NEXT: slli a2, a3, 3
+; CHECK-RV32-NEXT: bltz a2, .LBB61_691
+; CHECK-RV32-NEXT: j .LBB61_166
; CHECK-RV32-NEXT: .LBB61_691: # %cond.load621
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5737,7 +6039,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_166
+; CHECK-RV32-NEXT: slli a2, a3, 2
+; CHECK-RV32-NEXT: bgez a2, .LBB61_1029
+; CHECK-RV32-NEXT: j .LBB61_167
+; CHECK-RV32-NEXT: .LBB61_1029: # %cond.load621
+; CHECK-RV32-NEXT: j .LBB61_168
; CHECK-RV32-NEXT: .LBB61_692: # %cond.load633
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -5749,7 +6055,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_171
+; CHECK-RV32-NEXT: andi a3, a2, 1
+; CHECK-RV32-NEXT: bnez a3, .LBB61_693
+; CHECK-RV32-NEXT: j .LBB61_172
; CHECK-RV32-NEXT: .LBB61_693: # %cond.load637
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5762,7 +6070,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_172
+; CHECK-RV32-NEXT: andi a3, a2, 2
+; CHECK-RV32-NEXT: bnez a3, .LBB61_694
+; CHECK-RV32-NEXT: j .LBB61_173
; CHECK-RV32-NEXT: .LBB61_694: # %cond.load641
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5775,7 +6085,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_173
+; CHECK-RV32-NEXT: andi a3, a2, 4
+; CHECK-RV32-NEXT: bnez a3, .LBB61_695
+; CHECK-RV32-NEXT: j .LBB61_174
; CHECK-RV32-NEXT: .LBB61_695: # %cond.load645
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5788,7 +6100,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_174
+; CHECK-RV32-NEXT: andi a3, a2, 8
+; CHECK-RV32-NEXT: bnez a3, .LBB61_696
+; CHECK-RV32-NEXT: j .LBB61_175
; CHECK-RV32-NEXT: .LBB61_696: # %cond.load649
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5801,7 +6115,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_175
+; CHECK-RV32-NEXT: andi a3, a2, 16
+; CHECK-RV32-NEXT: bnez a3, .LBB61_697
+; CHECK-RV32-NEXT: j .LBB61_176
; CHECK-RV32-NEXT: .LBB61_697: # %cond.load653
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5814,7 +6130,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_176
+; CHECK-RV32-NEXT: andi a3, a2, 32
+; CHECK-RV32-NEXT: bnez a3, .LBB61_698
+; CHECK-RV32-NEXT: j .LBB61_177
; CHECK-RV32-NEXT: .LBB61_698: # %cond.load657
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5827,7 +6145,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_177
+; CHECK-RV32-NEXT: andi a3, a2, 64
+; CHECK-RV32-NEXT: bnez a3, .LBB61_699
+; CHECK-RV32-NEXT: j .LBB61_178
; CHECK-RV32-NEXT: .LBB61_699: # %cond.load661
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5840,7 +6160,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_178
+; CHECK-RV32-NEXT: andi a3, a2, 128
+; CHECK-RV32-NEXT: bnez a3, .LBB61_700
+; CHECK-RV32-NEXT: j .LBB61_179
; CHECK-RV32-NEXT: .LBB61_700: # %cond.load665
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5853,7 +6175,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_179
+; CHECK-RV32-NEXT: andi a3, a2, 256
+; CHECK-RV32-NEXT: bnez a3, .LBB61_701
+; CHECK-RV32-NEXT: j .LBB61_180
; CHECK-RV32-NEXT: .LBB61_701: # %cond.load669
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5866,7 +6190,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_180
+; CHECK-RV32-NEXT: andi a3, a2, 512
+; CHECK-RV32-NEXT: bnez a3, .LBB61_702
+; CHECK-RV32-NEXT: j .LBB61_181
; CHECK-RV32-NEXT: .LBB61_702: # %cond.load673
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5879,7 +6205,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_181
+; CHECK-RV32-NEXT: andi a3, a2, 1024
+; CHECK-RV32-NEXT: bnez a3, .LBB61_703
+; CHECK-RV32-NEXT: j .LBB61_182
; CHECK-RV32-NEXT: .LBB61_703: # %cond.load677
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5892,7 +6220,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_182
+; CHECK-RV32-NEXT: slli a3, a2, 20
+; CHECK-RV32-NEXT: bltz a3, .LBB61_704
+; CHECK-RV32-NEXT: j .LBB61_183
; CHECK-RV32-NEXT: .LBB61_704: # %cond.load681
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5905,7 +6235,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_183
+; CHECK-RV32-NEXT: slli a3, a2, 19
+; CHECK-RV32-NEXT: bltz a3, .LBB61_705
+; CHECK-RV32-NEXT: j .LBB61_184
; CHECK-RV32-NEXT: .LBB61_705: # %cond.load685
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5918,7 +6250,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_184
+; CHECK-RV32-NEXT: slli a3, a2, 18
+; CHECK-RV32-NEXT: bltz a3, .LBB61_706
+; CHECK-RV32-NEXT: j .LBB61_185
; CHECK-RV32-NEXT: .LBB61_706: # %cond.load689
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5931,7 +6265,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_185
+; CHECK-RV32-NEXT: slli a3, a2, 17
+; CHECK-RV32-NEXT: bltz a3, .LBB61_707
+; CHECK-RV32-NEXT: j .LBB61_186
; CHECK-RV32-NEXT: .LBB61_707: # %cond.load693
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5944,7 +6280,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_186
+; CHECK-RV32-NEXT: slli a3, a2, 16
+; CHECK-RV32-NEXT: bltz a3, .LBB61_708
+; CHECK-RV32-NEXT: j .LBB61_187
; CHECK-RV32-NEXT: .LBB61_708: # %cond.load697
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5957,7 +6295,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_187
+; CHECK-RV32-NEXT: slli a3, a2, 15
+; CHECK-RV32-NEXT: bltz a3, .LBB61_709
+; CHECK-RV32-NEXT: j .LBB61_188
; CHECK-RV32-NEXT: .LBB61_709: # %cond.load701
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5970,7 +6310,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_188
+; CHECK-RV32-NEXT: slli a3, a2, 14
+; CHECK-RV32-NEXT: bltz a3, .LBB61_710
+; CHECK-RV32-NEXT: j .LBB61_189
; CHECK-RV32-NEXT: .LBB61_710: # %cond.load705
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5983,7 +6325,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_189
+; CHECK-RV32-NEXT: slli a3, a2, 13
+; CHECK-RV32-NEXT: bltz a3, .LBB61_711
+; CHECK-RV32-NEXT: j .LBB61_190
; CHECK-RV32-NEXT: .LBB61_711: # %cond.load709
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -5996,7 +6340,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_190
+; CHECK-RV32-NEXT: slli a3, a2, 12
+; CHECK-RV32-NEXT: bltz a3, .LBB61_712
+; CHECK-RV32-NEXT: j .LBB61_191
; CHECK-RV32-NEXT: .LBB61_712: # %cond.load713
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6009,7 +6355,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_191
+; CHECK-RV32-NEXT: slli a3, a2, 11
+; CHECK-RV32-NEXT: bltz a3, .LBB61_713
+; CHECK-RV32-NEXT: j .LBB61_192
; CHECK-RV32-NEXT: .LBB61_713: # %cond.load717
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6022,7 +6370,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_192
+; CHECK-RV32-NEXT: slli a3, a2, 10
+; CHECK-RV32-NEXT: bltz a3, .LBB61_714
+; CHECK-RV32-NEXT: j .LBB61_193
; CHECK-RV32-NEXT: .LBB61_714: # %cond.load721
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6035,7 +6385,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_193
+; CHECK-RV32-NEXT: slli a3, a2, 9
+; CHECK-RV32-NEXT: bltz a3, .LBB61_715
+; CHECK-RV32-NEXT: j .LBB61_194
; CHECK-RV32-NEXT: .LBB61_715: # %cond.load725
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6048,7 +6400,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_194
+; CHECK-RV32-NEXT: slli a3, a2, 8
+; CHECK-RV32-NEXT: bltz a3, .LBB61_716
+; CHECK-RV32-NEXT: j .LBB61_195
; CHECK-RV32-NEXT: .LBB61_716: # %cond.load729
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6061,7 +6415,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_195
+; CHECK-RV32-NEXT: slli a3, a2, 7
+; CHECK-RV32-NEXT: bltz a3, .LBB61_717
+; CHECK-RV32-NEXT: j .LBB61_196
; CHECK-RV32-NEXT: .LBB61_717: # %cond.load733
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6074,7 +6430,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_196
+; CHECK-RV32-NEXT: slli a3, a2, 6
+; CHECK-RV32-NEXT: bltz a3, .LBB61_718
+; CHECK-RV32-NEXT: j .LBB61_197
; CHECK-RV32-NEXT: .LBB61_718: # %cond.load737
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6087,7 +6445,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_197
+; CHECK-RV32-NEXT: slli a3, a2, 5
+; CHECK-RV32-NEXT: bltz a3, .LBB61_719
+; CHECK-RV32-NEXT: j .LBB61_198
; CHECK-RV32-NEXT: .LBB61_719: # %cond.load741
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6100,7 +6460,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_198
+; CHECK-RV32-NEXT: slli a3, a2, 4
+; CHECK-RV32-NEXT: bltz a3, .LBB61_720
+; CHECK-RV32-NEXT: j .LBB61_199
; CHECK-RV32-NEXT: .LBB61_720: # %cond.load745
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6113,7 +6475,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_199
+; CHECK-RV32-NEXT: slli a3, a2, 3
+; CHECK-RV32-NEXT: bltz a3, .LBB61_721
+; CHECK-RV32-NEXT: j .LBB61_200
; CHECK-RV32-NEXT: .LBB61_721: # %cond.load749
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6126,7 +6490,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_200
+; CHECK-RV32-NEXT: slli a3, a2, 2
+; CHECK-RV32-NEXT: bgez a3, .LBB61_1030
+; CHECK-RV32-NEXT: j .LBB61_201
+; CHECK-RV32-NEXT: .LBB61_1030: # %cond.load749
+; CHECK-RV32-NEXT: j .LBB61_202
; CHECK-RV32-NEXT: .LBB61_722: # %cond.load761
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -6138,7 +6506,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_205
+; CHECK-RV32-NEXT: andi a2, a3, 1
+; CHECK-RV32-NEXT: bnez a2, .LBB61_723
+; CHECK-RV32-NEXT: j .LBB61_206
; CHECK-RV32-NEXT: .LBB61_723: # %cond.load765
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6151,7 +6521,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_206
+; CHECK-RV32-NEXT: andi a2, a3, 2
+; CHECK-RV32-NEXT: bnez a2, .LBB61_724
+; CHECK-RV32-NEXT: j .LBB61_207
; CHECK-RV32-NEXT: .LBB61_724: # %cond.load769
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6164,7 +6536,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_207
+; CHECK-RV32-NEXT: andi a2, a3, 4
+; CHECK-RV32-NEXT: bnez a2, .LBB61_725
+; CHECK-RV32-NEXT: j .LBB61_208
; CHECK-RV32-NEXT: .LBB61_725: # %cond.load773
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6177,7 +6551,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_208
+; CHECK-RV32-NEXT: andi a2, a3, 8
+; CHECK-RV32-NEXT: bnez a2, .LBB61_726
+; CHECK-RV32-NEXT: j .LBB61_209
; CHECK-RV32-NEXT: .LBB61_726: # %cond.load777
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6190,7 +6566,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_209
+; CHECK-RV32-NEXT: andi a2, a3, 16
+; CHECK-RV32-NEXT: bnez a2, .LBB61_727
+; CHECK-RV32-NEXT: j .LBB61_210
; CHECK-RV32-NEXT: .LBB61_727: # %cond.load781
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6203,7 +6581,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_210
+; CHECK-RV32-NEXT: andi a2, a3, 32
+; CHECK-RV32-NEXT: bnez a2, .LBB61_728
+; CHECK-RV32-NEXT: j .LBB61_211
; CHECK-RV32-NEXT: .LBB61_728: # %cond.load785
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6216,7 +6596,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_211
+; CHECK-RV32-NEXT: andi a2, a3, 64
+; CHECK-RV32-NEXT: bnez a2, .LBB61_729
+; CHECK-RV32-NEXT: j .LBB61_212
; CHECK-RV32-NEXT: .LBB61_729: # %cond.load789
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6229,7 +6611,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_212
+; CHECK-RV32-NEXT: andi a2, a3, 128
+; CHECK-RV32-NEXT: bnez a2, .LBB61_730
+; CHECK-RV32-NEXT: j .LBB61_213
; CHECK-RV32-NEXT: .LBB61_730: # %cond.load793
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6242,7 +6626,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_213
+; CHECK-RV32-NEXT: andi a2, a3, 256
+; CHECK-RV32-NEXT: bnez a2, .LBB61_731
+; CHECK-RV32-NEXT: j .LBB61_214
; CHECK-RV32-NEXT: .LBB61_731: # %cond.load797
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6255,7 +6641,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_214
+; CHECK-RV32-NEXT: andi a2, a3, 512
+; CHECK-RV32-NEXT: bnez a2, .LBB61_732
+; CHECK-RV32-NEXT: j .LBB61_215
; CHECK-RV32-NEXT: .LBB61_732: # %cond.load801
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6268,7 +6656,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_215
+; CHECK-RV32-NEXT: andi a2, a3, 1024
+; CHECK-RV32-NEXT: bnez a2, .LBB61_733
+; CHECK-RV32-NEXT: j .LBB61_216
; CHECK-RV32-NEXT: .LBB61_733: # %cond.load805
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6281,7 +6671,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_216
+; CHECK-RV32-NEXT: slli a2, a3, 20
+; CHECK-RV32-NEXT: bltz a2, .LBB61_734
+; CHECK-RV32-NEXT: j .LBB61_217
; CHECK-RV32-NEXT: .LBB61_734: # %cond.load809
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6294,7 +6686,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_217
+; CHECK-RV32-NEXT: slli a2, a3, 19
+; CHECK-RV32-NEXT: bltz a2, .LBB61_735
+; CHECK-RV32-NEXT: j .LBB61_218
; CHECK-RV32-NEXT: .LBB61_735: # %cond.load813
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6307,7 +6701,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_218
+; CHECK-RV32-NEXT: slli a2, a3, 18
+; CHECK-RV32-NEXT: bltz a2, .LBB61_736
+; CHECK-RV32-NEXT: j .LBB61_219
; CHECK-RV32-NEXT: .LBB61_736: # %cond.load817
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6320,7 +6716,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_219
+; CHECK-RV32-NEXT: slli a2, a3, 17
+; CHECK-RV32-NEXT: bltz a2, .LBB61_737
+; CHECK-RV32-NEXT: j .LBB61_220
; CHECK-RV32-NEXT: .LBB61_737: # %cond.load821
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6333,7 +6731,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_220
+; CHECK-RV32-NEXT: slli a2, a3, 16
+; CHECK-RV32-NEXT: bltz a2, .LBB61_738
+; CHECK-RV32-NEXT: j .LBB61_221
; CHECK-RV32-NEXT: .LBB61_738: # %cond.load825
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6346,7 +6746,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_221
+; CHECK-RV32-NEXT: slli a2, a3, 15
+; CHECK-RV32-NEXT: bltz a2, .LBB61_739
+; CHECK-RV32-NEXT: j .LBB61_222
; CHECK-RV32-NEXT: .LBB61_739: # %cond.load829
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6359,7 +6761,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_222
+; CHECK-RV32-NEXT: slli a2, a3, 14
+; CHECK-RV32-NEXT: bltz a2, .LBB61_740
+; CHECK-RV32-NEXT: j .LBB61_223
; CHECK-RV32-NEXT: .LBB61_740: # %cond.load833
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6372,7 +6776,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_223
+; CHECK-RV32-NEXT: slli a2, a3, 13
+; CHECK-RV32-NEXT: bltz a2, .LBB61_741
+; CHECK-RV32-NEXT: j .LBB61_224
; CHECK-RV32-NEXT: .LBB61_741: # %cond.load837
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6385,7 +6791,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_224
+; CHECK-RV32-NEXT: slli a2, a3, 12
+; CHECK-RV32-NEXT: bltz a2, .LBB61_742
+; CHECK-RV32-NEXT: j .LBB61_225
; CHECK-RV32-NEXT: .LBB61_742: # %cond.load841
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6398,7 +6806,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_225
+; CHECK-RV32-NEXT: slli a2, a3, 11
+; CHECK-RV32-NEXT: bltz a2, .LBB61_743
+; CHECK-RV32-NEXT: j .LBB61_226
; CHECK-RV32-NEXT: .LBB61_743: # %cond.load845
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6411,7 +6821,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_226
+; CHECK-RV32-NEXT: slli a2, a3, 10
+; CHECK-RV32-NEXT: bltz a2, .LBB61_744
+; CHECK-RV32-NEXT: j .LBB61_227
; CHECK-RV32-NEXT: .LBB61_744: # %cond.load849
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6424,7 +6836,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_227
+; CHECK-RV32-NEXT: slli a2, a3, 9
+; CHECK-RV32-NEXT: bltz a2, .LBB61_745
+; CHECK-RV32-NEXT: j .LBB61_228
; CHECK-RV32-NEXT: .LBB61_745: # %cond.load853
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6437,7 +6851,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_228
+; CHECK-RV32-NEXT: slli a2, a3, 8
+; CHECK-RV32-NEXT: bltz a2, .LBB61_746
+; CHECK-RV32-NEXT: j .LBB61_229
; CHECK-RV32-NEXT: .LBB61_746: # %cond.load857
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6450,7 +6866,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_229
+; CHECK-RV32-NEXT: slli a2, a3, 7
+; CHECK-RV32-NEXT: bltz a2, .LBB61_747
+; CHECK-RV32-NEXT: j .LBB61_230
; CHECK-RV32-NEXT: .LBB61_747: # %cond.load861
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6463,7 +6881,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_230
+; CHECK-RV32-NEXT: slli a2, a3, 6
+; CHECK-RV32-NEXT: bltz a2, .LBB61_748
+; CHECK-RV32-NEXT: j .LBB61_231
; CHECK-RV32-NEXT: .LBB61_748: # %cond.load865
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6476,7 +6896,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_231
+; CHECK-RV32-NEXT: slli a2, a3, 5
+; CHECK-RV32-NEXT: bltz a2, .LBB61_749
+; CHECK-RV32-NEXT: j .LBB61_232
; CHECK-RV32-NEXT: .LBB61_749: # %cond.load869
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6489,7 +6911,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_232
+; CHECK-RV32-NEXT: slli a2, a3, 4
+; CHECK-RV32-NEXT: bltz a2, .LBB61_750
+; CHECK-RV32-NEXT: j .LBB61_233
; CHECK-RV32-NEXT: .LBB61_750: # %cond.load873
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6502,7 +6926,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_233
+; CHECK-RV32-NEXT: slli a2, a3, 3
+; CHECK-RV32-NEXT: bltz a2, .LBB61_751
+; CHECK-RV32-NEXT: j .LBB61_234
; CHECK-RV32-NEXT: .LBB61_751: # %cond.load877
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6515,7 +6941,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_234
+; CHECK-RV32-NEXT: slli a2, a3, 2
+; CHECK-RV32-NEXT: bgez a2, .LBB61_1031
+; CHECK-RV32-NEXT: j .LBB61_235
+; CHECK-RV32-NEXT: .LBB61_1031: # %cond.load877
+; CHECK-RV32-NEXT: j .LBB61_236
; CHECK-RV32-NEXT: .LBB61_752: # %cond.load889
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -6527,7 +6957,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_239
+; CHECK-RV32-NEXT: andi a3, a2, 1
+; CHECK-RV32-NEXT: bnez a3, .LBB61_753
+; CHECK-RV32-NEXT: j .LBB61_240
; CHECK-RV32-NEXT: .LBB61_753: # %cond.load893
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6540,7 +6972,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_240
+; CHECK-RV32-NEXT: andi a3, a2, 2
+; CHECK-RV32-NEXT: bnez a3, .LBB61_754
+; CHECK-RV32-NEXT: j .LBB61_241
; CHECK-RV32-NEXT: .LBB61_754: # %cond.load897
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6553,7 +6987,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_241
+; CHECK-RV32-NEXT: andi a3, a2, 4
+; CHECK-RV32-NEXT: bnez a3, .LBB61_755
+; CHECK-RV32-NEXT: j .LBB61_242
; CHECK-RV32-NEXT: .LBB61_755: # %cond.load901
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6566,7 +7002,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_242
+; CHECK-RV32-NEXT: andi a3, a2, 8
+; CHECK-RV32-NEXT: bnez a3, .LBB61_756
+; CHECK-RV32-NEXT: j .LBB61_243
; CHECK-RV32-NEXT: .LBB61_756: # %cond.load905
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6579,7 +7017,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_243
+; CHECK-RV32-NEXT: andi a3, a2, 16
+; CHECK-RV32-NEXT: bnez a3, .LBB61_757
+; CHECK-RV32-NEXT: j .LBB61_244
; CHECK-RV32-NEXT: .LBB61_757: # %cond.load909
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6592,7 +7032,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_244
+; CHECK-RV32-NEXT: andi a3, a2, 32
+; CHECK-RV32-NEXT: bnez a3, .LBB61_758
+; CHECK-RV32-NEXT: j .LBB61_245
; CHECK-RV32-NEXT: .LBB61_758: # %cond.load913
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6605,7 +7047,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_245
+; CHECK-RV32-NEXT: andi a3, a2, 64
+; CHECK-RV32-NEXT: bnez a3, .LBB61_759
+; CHECK-RV32-NEXT: j .LBB61_246
; CHECK-RV32-NEXT: .LBB61_759: # %cond.load917
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6618,7 +7062,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_246
+; CHECK-RV32-NEXT: andi a3, a2, 128
+; CHECK-RV32-NEXT: bnez a3, .LBB61_760
+; CHECK-RV32-NEXT: j .LBB61_247
; CHECK-RV32-NEXT: .LBB61_760: # %cond.load921
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6631,7 +7077,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_247
+; CHECK-RV32-NEXT: andi a3, a2, 256
+; CHECK-RV32-NEXT: bnez a3, .LBB61_761
+; CHECK-RV32-NEXT: j .LBB61_248
; CHECK-RV32-NEXT: .LBB61_761: # %cond.load925
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6644,7 +7092,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_248
+; CHECK-RV32-NEXT: andi a3, a2, 512
+; CHECK-RV32-NEXT: bnez a3, .LBB61_762
+; CHECK-RV32-NEXT: j .LBB61_249
; CHECK-RV32-NEXT: .LBB61_762: # %cond.load929
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6657,7 +7107,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_249
+; CHECK-RV32-NEXT: andi a3, a2, 1024
+; CHECK-RV32-NEXT: bnez a3, .LBB61_763
+; CHECK-RV32-NEXT: j .LBB61_250
; CHECK-RV32-NEXT: .LBB61_763: # %cond.load933
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6670,7 +7122,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_250
+; CHECK-RV32-NEXT: slli a3, a2, 20
+; CHECK-RV32-NEXT: bltz a3, .LBB61_764
+; CHECK-RV32-NEXT: j .LBB61_251
; CHECK-RV32-NEXT: .LBB61_764: # %cond.load937
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6683,7 +7137,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_251
+; CHECK-RV32-NEXT: slli a3, a2, 19
+; CHECK-RV32-NEXT: bltz a3, .LBB61_765
+; CHECK-RV32-NEXT: j .LBB61_252
; CHECK-RV32-NEXT: .LBB61_765: # %cond.load941
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6696,7 +7152,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_252
+; CHECK-RV32-NEXT: slli a3, a2, 18
+; CHECK-RV32-NEXT: bltz a3, .LBB61_766
+; CHECK-RV32-NEXT: j .LBB61_253
; CHECK-RV32-NEXT: .LBB61_766: # %cond.load945
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6709,7 +7167,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_253
+; CHECK-RV32-NEXT: slli a3, a2, 17
+; CHECK-RV32-NEXT: bltz a3, .LBB61_767
+; CHECK-RV32-NEXT: j .LBB61_254
; CHECK-RV32-NEXT: .LBB61_767: # %cond.load949
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6722,7 +7182,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_254
+; CHECK-RV32-NEXT: slli a3, a2, 16
+; CHECK-RV32-NEXT: bltz a3, .LBB61_768
+; CHECK-RV32-NEXT: j .LBB61_255
; CHECK-RV32-NEXT: .LBB61_768: # %cond.load953
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6735,7 +7197,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_255
+; CHECK-RV32-NEXT: slli a3, a2, 15
+; CHECK-RV32-NEXT: bltz a3, .LBB61_769
+; CHECK-RV32-NEXT: j .LBB61_256
; CHECK-RV32-NEXT: .LBB61_769: # %cond.load957
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6748,7 +7212,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_256
+; CHECK-RV32-NEXT: slli a3, a2, 14
+; CHECK-RV32-NEXT: bltz a3, .LBB61_770
+; CHECK-RV32-NEXT: j .LBB61_257
; CHECK-RV32-NEXT: .LBB61_770: # %cond.load961
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6761,7 +7227,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_257
+; CHECK-RV32-NEXT: slli a3, a2, 13
+; CHECK-RV32-NEXT: bltz a3, .LBB61_771
+; CHECK-RV32-NEXT: j .LBB61_258
; CHECK-RV32-NEXT: .LBB61_771: # %cond.load965
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6774,7 +7242,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_258
+; CHECK-RV32-NEXT: slli a3, a2, 12
+; CHECK-RV32-NEXT: bltz a3, .LBB61_772
+; CHECK-RV32-NEXT: j .LBB61_259
; CHECK-RV32-NEXT: .LBB61_772: # %cond.load969
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6787,7 +7257,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_259
+; CHECK-RV32-NEXT: slli a3, a2, 11
+; CHECK-RV32-NEXT: bltz a3, .LBB61_773
+; CHECK-RV32-NEXT: j .LBB61_260
; CHECK-RV32-NEXT: .LBB61_773: # %cond.load973
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6800,7 +7272,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_260
+; CHECK-RV32-NEXT: slli a3, a2, 10
+; CHECK-RV32-NEXT: bltz a3, .LBB61_774
+; CHECK-RV32-NEXT: j .LBB61_261
; CHECK-RV32-NEXT: .LBB61_774: # %cond.load977
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6813,7 +7287,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_261
+; CHECK-RV32-NEXT: slli a3, a2, 9
+; CHECK-RV32-NEXT: bltz a3, .LBB61_775
+; CHECK-RV32-NEXT: j .LBB61_262
; CHECK-RV32-NEXT: .LBB61_775: # %cond.load981
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6826,7 +7302,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_262
+; CHECK-RV32-NEXT: slli a3, a2, 8
+; CHECK-RV32-NEXT: bltz a3, .LBB61_776
+; CHECK-RV32-NEXT: j .LBB61_263
; CHECK-RV32-NEXT: .LBB61_776: # %cond.load985
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6839,7 +7317,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_263
+; CHECK-RV32-NEXT: slli a3, a2, 7
+; CHECK-RV32-NEXT: bltz a3, .LBB61_777
+; CHECK-RV32-NEXT: j .LBB61_264
; CHECK-RV32-NEXT: .LBB61_777: # %cond.load989
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6852,7 +7332,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_264
+; CHECK-RV32-NEXT: slli a3, a2, 6
+; CHECK-RV32-NEXT: bltz a3, .LBB61_778
+; CHECK-RV32-NEXT: j .LBB61_265
; CHECK-RV32-NEXT: .LBB61_778: # %cond.load993
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6865,7 +7347,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_265
+; CHECK-RV32-NEXT: slli a3, a2, 5
+; CHECK-RV32-NEXT: bltz a3, .LBB61_779
+; CHECK-RV32-NEXT: j .LBB61_266
; CHECK-RV32-NEXT: .LBB61_779: # %cond.load997
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6878,7 +7362,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_266
+; CHECK-RV32-NEXT: slli a3, a2, 4
+; CHECK-RV32-NEXT: bltz a3, .LBB61_780
+; CHECK-RV32-NEXT: j .LBB61_267
; CHECK-RV32-NEXT: .LBB61_780: # %cond.load1001
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6891,7 +7377,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_267
+; CHECK-RV32-NEXT: slli a3, a2, 3
+; CHECK-RV32-NEXT: bltz a3, .LBB61_781
+; CHECK-RV32-NEXT: j .LBB61_268
; CHECK-RV32-NEXT: .LBB61_781: # %cond.load1005
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -6904,7 +7392,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_268
+; CHECK-RV32-NEXT: slli a3, a2, 2
+; CHECK-RV32-NEXT: bgez a3, .LBB61_1032
+; CHECK-RV32-NEXT: j .LBB61_269
+; CHECK-RV32-NEXT: .LBB61_1032: # %cond.load1005
+; CHECK-RV32-NEXT: j .LBB61_270
; CHECK-RV32-NEXT: .LBB61_782: # %cond.load1017
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv8r.v v16, v8
@@ -6916,7 +7408,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: addi a0, a0, 1
; CHECK-RV32-NEXT: vmv4r.v v16, v8
; CHECK-RV32-NEXT: vmv8r.v v8, v16
-; CHECK-RV32-NEXT: j .LBB61_273
+; CHECK-RV32-NEXT: andi a2, a3, 1
+; CHECK-RV32-NEXT: bnez a2, .LBB61_783
+; CHECK-RV32-NEXT: j .LBB61_274
; CHECK-RV32-NEXT: .LBB61_783: # %cond.load1021
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -6927,7 +7421,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_274
+; CHECK-RV32-NEXT: andi a2, a3, 2
+; CHECK-RV32-NEXT: bnez a2, .LBB61_784
+; CHECK-RV32-NEXT: j .LBB61_275
; CHECK-RV32-NEXT: .LBB61_784: # %cond.load1025
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -6938,7 +7434,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_275
+; CHECK-RV32-NEXT: andi a2, a3, 4
+; CHECK-RV32-NEXT: bnez a2, .LBB61_785
+; CHECK-RV32-NEXT: j .LBB61_276
; CHECK-RV32-NEXT: .LBB61_785: # %cond.load1029
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -6949,7 +7447,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_276
+; CHECK-RV32-NEXT: andi a2, a3, 8
+; CHECK-RV32-NEXT: bnez a2, .LBB61_786
+; CHECK-RV32-NEXT: j .LBB61_277
; CHECK-RV32-NEXT: .LBB61_786: # %cond.load1033
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -6960,7 +7460,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_277
+; CHECK-RV32-NEXT: andi a2, a3, 16
+; CHECK-RV32-NEXT: bnez a2, .LBB61_787
+; CHECK-RV32-NEXT: j .LBB61_278
; CHECK-RV32-NEXT: .LBB61_787: # %cond.load1037
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -6971,7 +7473,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_278
+; CHECK-RV32-NEXT: andi a2, a3, 32
+; CHECK-RV32-NEXT: bnez a2, .LBB61_788
+; CHECK-RV32-NEXT: j .LBB61_279
; CHECK-RV32-NEXT: .LBB61_788: # %cond.load1041
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -6982,7 +7486,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_279
+; CHECK-RV32-NEXT: andi a2, a3, 64
+; CHECK-RV32-NEXT: bnez a2, .LBB61_789
+; CHECK-RV32-NEXT: j .LBB61_280
; CHECK-RV32-NEXT: .LBB61_789: # %cond.load1045
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -6993,7 +7499,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_280
+; CHECK-RV32-NEXT: andi a2, a3, 128
+; CHECK-RV32-NEXT: bnez a2, .LBB61_790
+; CHECK-RV32-NEXT: j .LBB61_281
; CHECK-RV32-NEXT: .LBB61_790: # %cond.load1049
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7004,7 +7512,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_281
+; CHECK-RV32-NEXT: andi a2, a3, 256
+; CHECK-RV32-NEXT: bnez a2, .LBB61_791
+; CHECK-RV32-NEXT: j .LBB61_282
; CHECK-RV32-NEXT: .LBB61_791: # %cond.load1053
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7015,7 +7525,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_282
+; CHECK-RV32-NEXT: andi a2, a3, 512
+; CHECK-RV32-NEXT: bnez a2, .LBB61_792
+; CHECK-RV32-NEXT: j .LBB61_283
; CHECK-RV32-NEXT: .LBB61_792: # %cond.load1057
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7026,7 +7538,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_283
+; CHECK-RV32-NEXT: andi a2, a3, 1024
+; CHECK-RV32-NEXT: bnez a2, .LBB61_793
+; CHECK-RV32-NEXT: j .LBB61_284
; CHECK-RV32-NEXT: .LBB61_793: # %cond.load1061
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7037,7 +7551,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_284
+; CHECK-RV32-NEXT: slli a2, a3, 20
+; CHECK-RV32-NEXT: bltz a2, .LBB61_794
+; CHECK-RV32-NEXT: j .LBB61_285
; CHECK-RV32-NEXT: .LBB61_794: # %cond.load1065
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7048,7 +7564,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_285
+; CHECK-RV32-NEXT: slli a2, a3, 19
+; CHECK-RV32-NEXT: bltz a2, .LBB61_795
+; CHECK-RV32-NEXT: j .LBB61_286
; CHECK-RV32-NEXT: .LBB61_795: # %cond.load1069
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7059,7 +7577,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_286
+; CHECK-RV32-NEXT: slli a2, a3, 18
+; CHECK-RV32-NEXT: bltz a2, .LBB61_796
+; CHECK-RV32-NEXT: j .LBB61_287
; CHECK-RV32-NEXT: .LBB61_796: # %cond.load1073
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7070,7 +7590,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_287
+; CHECK-RV32-NEXT: slli a2, a3, 17
+; CHECK-RV32-NEXT: bltz a2, .LBB61_797
+; CHECK-RV32-NEXT: j .LBB61_288
; CHECK-RV32-NEXT: .LBB61_797: # %cond.load1077
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7081,7 +7603,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_288
+; CHECK-RV32-NEXT: slli a2, a3, 16
+; CHECK-RV32-NEXT: bltz a2, .LBB61_798
+; CHECK-RV32-NEXT: j .LBB61_289
; CHECK-RV32-NEXT: .LBB61_798: # %cond.load1081
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7092,7 +7616,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_289
+; CHECK-RV32-NEXT: slli a2, a3, 15
+; CHECK-RV32-NEXT: bltz a2, .LBB61_799
+; CHECK-RV32-NEXT: j .LBB61_290
; CHECK-RV32-NEXT: .LBB61_799: # %cond.load1085
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7103,7 +7629,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_290
+; CHECK-RV32-NEXT: slli a2, a3, 14
+; CHECK-RV32-NEXT: bltz a2, .LBB61_800
+; CHECK-RV32-NEXT: j .LBB61_291
; CHECK-RV32-NEXT: .LBB61_800: # %cond.load1089
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7114,7 +7642,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_291
+; CHECK-RV32-NEXT: slli a2, a3, 13
+; CHECK-RV32-NEXT: bltz a2, .LBB61_801
+; CHECK-RV32-NEXT: j .LBB61_292
; CHECK-RV32-NEXT: .LBB61_801: # %cond.load1093
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7125,7 +7655,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_292
+; CHECK-RV32-NEXT: slli a2, a3, 12
+; CHECK-RV32-NEXT: bltz a2, .LBB61_802
+; CHECK-RV32-NEXT: j .LBB61_293
; CHECK-RV32-NEXT: .LBB61_802: # %cond.load1097
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7136,7 +7668,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_293
+; CHECK-RV32-NEXT: slli a2, a3, 11
+; CHECK-RV32-NEXT: bltz a2, .LBB61_803
+; CHECK-RV32-NEXT: j .LBB61_294
; CHECK-RV32-NEXT: .LBB61_803: # %cond.load1101
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7147,7 +7681,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_294
+; CHECK-RV32-NEXT: slli a2, a3, 10
+; CHECK-RV32-NEXT: bltz a2, .LBB61_804
+; CHECK-RV32-NEXT: j .LBB61_295
; CHECK-RV32-NEXT: .LBB61_804: # %cond.load1105
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7158,7 +7694,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_295
+; CHECK-RV32-NEXT: slli a2, a3, 9
+; CHECK-RV32-NEXT: bltz a2, .LBB61_805
+; CHECK-RV32-NEXT: j .LBB61_296
; CHECK-RV32-NEXT: .LBB61_805: # %cond.load1109
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7169,8 +7707,10 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_296
-; CHECK-RV32-NEXT: .LBB61_806: # %cond.load1113
+; CHECK-RV32-NEXT: slli a2, a3, 8
+; CHECK-RV32-NEXT: bltz a2, .LBB61_806
+; CHECK-RV32-NEXT: j .LBB61_297
+; CHECK-RV32-NEXT: .LBB61_806: # %cond.load1113
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -7180,7 +7720,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_297
+; CHECK-RV32-NEXT: slli a2, a3, 7
+; CHECK-RV32-NEXT: bltz a2, .LBB61_807
+; CHECK-RV32-NEXT: j .LBB61_298
; CHECK-RV32-NEXT: .LBB61_807: # %cond.load1117
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7191,7 +7733,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_298
+; CHECK-RV32-NEXT: slli a2, a3, 6
+; CHECK-RV32-NEXT: bltz a2, .LBB61_808
+; CHECK-RV32-NEXT: j .LBB61_299
; CHECK-RV32-NEXT: .LBB61_808: # %cond.load1121
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7202,7 +7746,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_299
+; CHECK-RV32-NEXT: slli a2, a3, 5
+; CHECK-RV32-NEXT: bltz a2, .LBB61_809
+; CHECK-RV32-NEXT: j .LBB61_300
; CHECK-RV32-NEXT: .LBB61_809: # %cond.load1125
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7213,7 +7759,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_300
+; CHECK-RV32-NEXT: slli a2, a3, 4
+; CHECK-RV32-NEXT: bltz a2, .LBB61_810
+; CHECK-RV32-NEXT: j .LBB61_301
; CHECK-RV32-NEXT: .LBB61_810: # %cond.load1129
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7224,7 +7772,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_301
+; CHECK-RV32-NEXT: slli a2, a3, 3
+; CHECK-RV32-NEXT: bltz a2, .LBB61_811
+; CHECK-RV32-NEXT: j .LBB61_302
; CHECK-RV32-NEXT: .LBB61_811: # %cond.load1133
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7235,7 +7785,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_302
+; CHECK-RV32-NEXT: slli a2, a3, 2
+; CHECK-RV32-NEXT: bgez a2, .LBB61_1033
+; CHECK-RV32-NEXT: j .LBB61_303
+; CHECK-RV32-NEXT: .LBB61_1033: # %cond.load1133
+; CHECK-RV32-NEXT: j .LBB61_304
; CHECK-RV32-NEXT: .LBB61_812: # %cond.load1145
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a3
@@ -7244,7 +7798,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_307
+; CHECK-RV32-NEXT: andi a3, a2, 1
+; CHECK-RV32-NEXT: bnez a3, .LBB61_813
+; CHECK-RV32-NEXT: j .LBB61_308
; CHECK-RV32-NEXT: .LBB61_813: # %cond.load1149
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7255,7 +7811,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_308
+; CHECK-RV32-NEXT: andi a3, a2, 2
+; CHECK-RV32-NEXT: bnez a3, .LBB61_814
+; CHECK-RV32-NEXT: j .LBB61_309
; CHECK-RV32-NEXT: .LBB61_814: # %cond.load1153
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7266,7 +7824,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_309
+; CHECK-RV32-NEXT: andi a3, a2, 4
+; CHECK-RV32-NEXT: bnez a3, .LBB61_815
+; CHECK-RV32-NEXT: j .LBB61_310
; CHECK-RV32-NEXT: .LBB61_815: # %cond.load1157
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7277,7 +7837,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_310
+; CHECK-RV32-NEXT: andi a3, a2, 8
+; CHECK-RV32-NEXT: bnez a3, .LBB61_816
+; CHECK-RV32-NEXT: j .LBB61_311
; CHECK-RV32-NEXT: .LBB61_816: # %cond.load1161
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7288,7 +7850,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_311
+; CHECK-RV32-NEXT: andi a3, a2, 16
+; CHECK-RV32-NEXT: bnez a3, .LBB61_817
+; CHECK-RV32-NEXT: j .LBB61_312
; CHECK-RV32-NEXT: .LBB61_817: # %cond.load1165
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7299,7 +7863,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_312
+; CHECK-RV32-NEXT: andi a3, a2, 32
+; CHECK-RV32-NEXT: bnez a3, .LBB61_818
+; CHECK-RV32-NEXT: j .LBB61_313
; CHECK-RV32-NEXT: .LBB61_818: # %cond.load1169
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7310,7 +7876,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_313
+; CHECK-RV32-NEXT: andi a3, a2, 64
+; CHECK-RV32-NEXT: bnez a3, .LBB61_819
+; CHECK-RV32-NEXT: j .LBB61_314
; CHECK-RV32-NEXT: .LBB61_819: # %cond.load1173
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7321,7 +7889,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_314
+; CHECK-RV32-NEXT: andi a3, a2, 128
+; CHECK-RV32-NEXT: bnez a3, .LBB61_820
+; CHECK-RV32-NEXT: j .LBB61_315
; CHECK-RV32-NEXT: .LBB61_820: # %cond.load1177
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7332,7 +7902,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_315
+; CHECK-RV32-NEXT: andi a3, a2, 256
+; CHECK-RV32-NEXT: bnez a3, .LBB61_821
+; CHECK-RV32-NEXT: j .LBB61_316
; CHECK-RV32-NEXT: .LBB61_821: # %cond.load1181
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7343,7 +7915,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_316
+; CHECK-RV32-NEXT: andi a3, a2, 512
+; CHECK-RV32-NEXT: bnez a3, .LBB61_822
+; CHECK-RV32-NEXT: j .LBB61_317
; CHECK-RV32-NEXT: .LBB61_822: # %cond.load1185
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7354,7 +7928,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_317
+; CHECK-RV32-NEXT: andi a3, a2, 1024
+; CHECK-RV32-NEXT: bnez a3, .LBB61_823
+; CHECK-RV32-NEXT: j .LBB61_318
; CHECK-RV32-NEXT: .LBB61_823: # %cond.load1189
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7365,7 +7941,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_318
+; CHECK-RV32-NEXT: slli a3, a2, 20
+; CHECK-RV32-NEXT: bltz a3, .LBB61_824
+; CHECK-RV32-NEXT: j .LBB61_319
; CHECK-RV32-NEXT: .LBB61_824: # %cond.load1193
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7376,7 +7954,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_319
+; CHECK-RV32-NEXT: slli a3, a2, 19
+; CHECK-RV32-NEXT: bltz a3, .LBB61_825
+; CHECK-RV32-NEXT: j .LBB61_320
; CHECK-RV32-NEXT: .LBB61_825: # %cond.load1197
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7387,7 +7967,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_320
+; CHECK-RV32-NEXT: slli a3, a2, 18
+; CHECK-RV32-NEXT: bltz a3, .LBB61_826
+; CHECK-RV32-NEXT: j .LBB61_321
; CHECK-RV32-NEXT: .LBB61_826: # %cond.load1201
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7398,7 +7980,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_321
+; CHECK-RV32-NEXT: slli a3, a2, 17
+; CHECK-RV32-NEXT: bltz a3, .LBB61_827
+; CHECK-RV32-NEXT: j .LBB61_322
; CHECK-RV32-NEXT: .LBB61_827: # %cond.load1205
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7409,7 +7993,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_322
+; CHECK-RV32-NEXT: slli a3, a2, 16
+; CHECK-RV32-NEXT: bltz a3, .LBB61_828
+; CHECK-RV32-NEXT: j .LBB61_323
; CHECK-RV32-NEXT: .LBB61_828: # %cond.load1209
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7420,7 +8006,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_323
+; CHECK-RV32-NEXT: slli a3, a2, 15
+; CHECK-RV32-NEXT: bltz a3, .LBB61_829
+; CHECK-RV32-NEXT: j .LBB61_324
; CHECK-RV32-NEXT: .LBB61_829: # %cond.load1213
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7431,7 +8019,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_324
+; CHECK-RV32-NEXT: slli a3, a2, 14
+; CHECK-RV32-NEXT: bltz a3, .LBB61_830
+; CHECK-RV32-NEXT: j .LBB61_325
; CHECK-RV32-NEXT: .LBB61_830: # %cond.load1217
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7442,7 +8032,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_325
+; CHECK-RV32-NEXT: slli a3, a2, 13
+; CHECK-RV32-NEXT: bltz a3, .LBB61_831
+; CHECK-RV32-NEXT: j .LBB61_326
; CHECK-RV32-NEXT: .LBB61_831: # %cond.load1221
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7453,7 +8045,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_326
+; CHECK-RV32-NEXT: slli a3, a2, 12
+; CHECK-RV32-NEXT: bltz a3, .LBB61_832
+; CHECK-RV32-NEXT: j .LBB61_327
; CHECK-RV32-NEXT: .LBB61_832: # %cond.load1225
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7464,7 +8058,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_327
+; CHECK-RV32-NEXT: slli a3, a2, 11
+; CHECK-RV32-NEXT: bltz a3, .LBB61_833
+; CHECK-RV32-NEXT: j .LBB61_328
; CHECK-RV32-NEXT: .LBB61_833: # %cond.load1229
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7475,7 +8071,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_328
+; CHECK-RV32-NEXT: slli a3, a2, 10
+; CHECK-RV32-NEXT: bltz a3, .LBB61_834
+; CHECK-RV32-NEXT: j .LBB61_329
; CHECK-RV32-NEXT: .LBB61_834: # %cond.load1233
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7486,7 +8084,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_329
+; CHECK-RV32-NEXT: slli a3, a2, 9
+; CHECK-RV32-NEXT: bltz a3, .LBB61_835
+; CHECK-RV32-NEXT: j .LBB61_330
; CHECK-RV32-NEXT: .LBB61_835: # %cond.load1237
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7497,7 +8097,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_330
+; CHECK-RV32-NEXT: slli a3, a2, 8
+; CHECK-RV32-NEXT: bltz a3, .LBB61_836
+; CHECK-RV32-NEXT: j .LBB61_331
; CHECK-RV32-NEXT: .LBB61_836: # %cond.load1241
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7508,7 +8110,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_331
+; CHECK-RV32-NEXT: slli a3, a2, 7
+; CHECK-RV32-NEXT: bltz a3, .LBB61_837
+; CHECK-RV32-NEXT: j .LBB61_332
; CHECK-RV32-NEXT: .LBB61_837: # %cond.load1245
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7519,7 +8123,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_332
+; CHECK-RV32-NEXT: slli a3, a2, 6
+; CHECK-RV32-NEXT: bltz a3, .LBB61_838
+; CHECK-RV32-NEXT: j .LBB61_333
; CHECK-RV32-NEXT: .LBB61_838: # %cond.load1249
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7530,7 +8136,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_333
+; CHECK-RV32-NEXT: slli a3, a2, 5
+; CHECK-RV32-NEXT: bltz a3, .LBB61_839
+; CHECK-RV32-NEXT: j .LBB61_334
; CHECK-RV32-NEXT: .LBB61_839: # %cond.load1253
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7541,7 +8149,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_334
+; CHECK-RV32-NEXT: slli a3, a2, 4
+; CHECK-RV32-NEXT: bltz a3, .LBB61_840
+; CHECK-RV32-NEXT: j .LBB61_335
; CHECK-RV32-NEXT: .LBB61_840: # %cond.load1257
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7552,7 +8162,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_335
+; CHECK-RV32-NEXT: slli a3, a2, 3
+; CHECK-RV32-NEXT: bltz a3, .LBB61_841
+; CHECK-RV32-NEXT: j .LBB61_336
; CHECK-RV32-NEXT: .LBB61_841: # %cond.load1261
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7563,7 +8175,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_336
+; CHECK-RV32-NEXT: slli a3, a2, 2
+; CHECK-RV32-NEXT: bgez a3, .LBB61_1034
+; CHECK-RV32-NEXT: j .LBB61_337
+; CHECK-RV32-NEXT: .LBB61_1034: # %cond.load1261
+; CHECK-RV32-NEXT: j .LBB61_338
; CHECK-RV32-NEXT: .LBB61_842: # %cond.load1273
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a2
@@ -7572,7 +8188,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_341
+; CHECK-RV32-NEXT: andi a2, a3, 1
+; CHECK-RV32-NEXT: bnez a2, .LBB61_843
+; CHECK-RV32-NEXT: j .LBB61_342
; CHECK-RV32-NEXT: .LBB61_843: # %cond.load1277
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7583,7 +8201,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_342
+; CHECK-RV32-NEXT: andi a2, a3, 2
+; CHECK-RV32-NEXT: bnez a2, .LBB61_844
+; CHECK-RV32-NEXT: j .LBB61_343
; CHECK-RV32-NEXT: .LBB61_844: # %cond.load1281
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7594,7 +8214,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_343
+; CHECK-RV32-NEXT: andi a2, a3, 4
+; CHECK-RV32-NEXT: bnez a2, .LBB61_845
+; CHECK-RV32-NEXT: j .LBB61_344
; CHECK-RV32-NEXT: .LBB61_845: # %cond.load1285
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7605,7 +8227,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_344
+; CHECK-RV32-NEXT: andi a2, a3, 8
+; CHECK-RV32-NEXT: bnez a2, .LBB61_846
+; CHECK-RV32-NEXT: j .LBB61_345
; CHECK-RV32-NEXT: .LBB61_846: # %cond.load1289
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7616,7 +8240,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_345
+; CHECK-RV32-NEXT: andi a2, a3, 16
+; CHECK-RV32-NEXT: bnez a2, .LBB61_847
+; CHECK-RV32-NEXT: j .LBB61_346
; CHECK-RV32-NEXT: .LBB61_847: # %cond.load1293
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7627,7 +8253,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_346
+; CHECK-RV32-NEXT: andi a2, a3, 32
+; CHECK-RV32-NEXT: bnez a2, .LBB61_848
+; CHECK-RV32-NEXT: j .LBB61_347
; CHECK-RV32-NEXT: .LBB61_848: # %cond.load1297
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7638,7 +8266,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_347
+; CHECK-RV32-NEXT: andi a2, a3, 64
+; CHECK-RV32-NEXT: bnez a2, .LBB61_849
+; CHECK-RV32-NEXT: j .LBB61_348
; CHECK-RV32-NEXT: .LBB61_849: # %cond.load1301
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7649,7 +8279,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_348
+; CHECK-RV32-NEXT: andi a2, a3, 128
+; CHECK-RV32-NEXT: bnez a2, .LBB61_850
+; CHECK-RV32-NEXT: j .LBB61_349
; CHECK-RV32-NEXT: .LBB61_850: # %cond.load1305
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7660,7 +8292,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_349
+; CHECK-RV32-NEXT: andi a2, a3, 256
+; CHECK-RV32-NEXT: bnez a2, .LBB61_851
+; CHECK-RV32-NEXT: j .LBB61_350
; CHECK-RV32-NEXT: .LBB61_851: # %cond.load1309
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7671,7 +8305,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_350
+; CHECK-RV32-NEXT: andi a2, a3, 512
+; CHECK-RV32-NEXT: bnez a2, .LBB61_852
+; CHECK-RV32-NEXT: j .LBB61_351
; CHECK-RV32-NEXT: .LBB61_852: # %cond.load1313
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7682,7 +8318,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_351
+; CHECK-RV32-NEXT: andi a2, a3, 1024
+; CHECK-RV32-NEXT: bnez a2, .LBB61_853
+; CHECK-RV32-NEXT: j .LBB61_352
; CHECK-RV32-NEXT: .LBB61_853: # %cond.load1317
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7693,7 +8331,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_352
+; CHECK-RV32-NEXT: slli a2, a3, 20
+; CHECK-RV32-NEXT: bltz a2, .LBB61_854
+; CHECK-RV32-NEXT: j .LBB61_353
; CHECK-RV32-NEXT: .LBB61_854: # %cond.load1321
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7704,7 +8344,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_353
+; CHECK-RV32-NEXT: slli a2, a3, 19
+; CHECK-RV32-NEXT: bltz a2, .LBB61_855
+; CHECK-RV32-NEXT: j .LBB61_354
; CHECK-RV32-NEXT: .LBB61_855: # %cond.load1325
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7715,7 +8357,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_354
+; CHECK-RV32-NEXT: slli a2, a3, 18
+; CHECK-RV32-NEXT: bltz a2, .LBB61_856
+; CHECK-RV32-NEXT: j .LBB61_355
; CHECK-RV32-NEXT: .LBB61_856: # %cond.load1329
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7726,7 +8370,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_355
+; CHECK-RV32-NEXT: slli a2, a3, 17
+; CHECK-RV32-NEXT: bltz a2, .LBB61_857
+; CHECK-RV32-NEXT: j .LBB61_356
; CHECK-RV32-NEXT: .LBB61_857: # %cond.load1333
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7737,7 +8383,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_356
+; CHECK-RV32-NEXT: slli a2, a3, 16
+; CHECK-RV32-NEXT: bltz a2, .LBB61_858
+; CHECK-RV32-NEXT: j .LBB61_357
; CHECK-RV32-NEXT: .LBB61_858: # %cond.load1337
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7748,7 +8396,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_357
+; CHECK-RV32-NEXT: slli a2, a3, 15
+; CHECK-RV32-NEXT: bltz a2, .LBB61_859
+; CHECK-RV32-NEXT: j .LBB61_358
; CHECK-RV32-NEXT: .LBB61_859: # %cond.load1341
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7759,7 +8409,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_358
+; CHECK-RV32-NEXT: slli a2, a3, 14
+; CHECK-RV32-NEXT: bltz a2, .LBB61_860
+; CHECK-RV32-NEXT: j .LBB61_359
; CHECK-RV32-NEXT: .LBB61_860: # %cond.load1345
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7770,7 +8422,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_359
+; CHECK-RV32-NEXT: slli a2, a3, 13
+; CHECK-RV32-NEXT: bltz a2, .LBB61_861
+; CHECK-RV32-NEXT: j .LBB61_360
; CHECK-RV32-NEXT: .LBB61_861: # %cond.load1349
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7781,7 +8435,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_360
+; CHECK-RV32-NEXT: slli a2, a3, 12
+; CHECK-RV32-NEXT: bltz a2, .LBB61_862
+; CHECK-RV32-NEXT: j .LBB61_361
; CHECK-RV32-NEXT: .LBB61_862: # %cond.load1353
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7792,7 +8448,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_361
+; CHECK-RV32-NEXT: slli a2, a3, 11
+; CHECK-RV32-NEXT: bltz a2, .LBB61_863
+; CHECK-RV32-NEXT: j .LBB61_362
; CHECK-RV32-NEXT: .LBB61_863: # %cond.load1357
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7803,7 +8461,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_362
+; CHECK-RV32-NEXT: slli a2, a3, 10
+; CHECK-RV32-NEXT: bltz a2, .LBB61_864
+; CHECK-RV32-NEXT: j .LBB61_363
; CHECK-RV32-NEXT: .LBB61_864: # %cond.load1361
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7814,7 +8474,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_363
+; CHECK-RV32-NEXT: slli a2, a3, 9
+; CHECK-RV32-NEXT: bltz a2, .LBB61_865
+; CHECK-RV32-NEXT: j .LBB61_364
; CHECK-RV32-NEXT: .LBB61_865: # %cond.load1365
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7825,7 +8487,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_364
+; CHECK-RV32-NEXT: slli a2, a3, 8
+; CHECK-RV32-NEXT: bltz a2, .LBB61_866
+; CHECK-RV32-NEXT: j .LBB61_365
; CHECK-RV32-NEXT: .LBB61_866: # %cond.load1369
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7836,7 +8500,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_365
+; CHECK-RV32-NEXT: slli a2, a3, 7
+; CHECK-RV32-NEXT: bltz a2, .LBB61_867
+; CHECK-RV32-NEXT: j .LBB61_366
; CHECK-RV32-NEXT: .LBB61_867: # %cond.load1373
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7847,7 +8513,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_366
+; CHECK-RV32-NEXT: slli a2, a3, 6
+; CHECK-RV32-NEXT: bltz a2, .LBB61_868
+; CHECK-RV32-NEXT: j .LBB61_367
; CHECK-RV32-NEXT: .LBB61_868: # %cond.load1377
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7858,7 +8526,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_367
+; CHECK-RV32-NEXT: slli a2, a3, 5
+; CHECK-RV32-NEXT: bltz a2, .LBB61_869
+; CHECK-RV32-NEXT: j .LBB61_368
; CHECK-RV32-NEXT: .LBB61_869: # %cond.load1381
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7869,7 +8539,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_368
+; CHECK-RV32-NEXT: slli a2, a3, 4
+; CHECK-RV32-NEXT: bltz a2, .LBB61_870
+; CHECK-RV32-NEXT: j .LBB61_369
; CHECK-RV32-NEXT: .LBB61_870: # %cond.load1385
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7880,7 +8552,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_369
+; CHECK-RV32-NEXT: slli a2, a3, 3
+; CHECK-RV32-NEXT: bltz a2, .LBB61_871
+; CHECK-RV32-NEXT: j .LBB61_370
; CHECK-RV32-NEXT: .LBB61_871: # %cond.load1389
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7891,7 +8565,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_370
+; CHECK-RV32-NEXT: slli a2, a3, 2
+; CHECK-RV32-NEXT: bgez a2, .LBB61_1035
+; CHECK-RV32-NEXT: j .LBB61_371
+; CHECK-RV32-NEXT: .LBB61_1035: # %cond.load1389
+; CHECK-RV32-NEXT: j .LBB61_372
; CHECK-RV32-NEXT: .LBB61_872: # %cond.load1401
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a3
@@ -7900,7 +8578,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_375
+; CHECK-RV32-NEXT: andi a3, a2, 1
+; CHECK-RV32-NEXT: bnez a3, .LBB61_873
+; CHECK-RV32-NEXT: j .LBB61_376
; CHECK-RV32-NEXT: .LBB61_873: # %cond.load1405
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7911,7 +8591,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_376
+; CHECK-RV32-NEXT: andi a3, a2, 2
+; CHECK-RV32-NEXT: bnez a3, .LBB61_874
+; CHECK-RV32-NEXT: j .LBB61_377
; CHECK-RV32-NEXT: .LBB61_874: # %cond.load1409
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7922,7 +8604,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_377
+; CHECK-RV32-NEXT: andi a3, a2, 4
+; CHECK-RV32-NEXT: bnez a3, .LBB61_875
+; CHECK-RV32-NEXT: j .LBB61_378
; CHECK-RV32-NEXT: .LBB61_875: # %cond.load1413
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7933,7 +8617,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_378
+; CHECK-RV32-NEXT: andi a3, a2, 8
+; CHECK-RV32-NEXT: bnez a3, .LBB61_876
+; CHECK-RV32-NEXT: j .LBB61_379
; CHECK-RV32-NEXT: .LBB61_876: # %cond.load1417
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7944,7 +8630,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_379
+; CHECK-RV32-NEXT: andi a3, a2, 16
+; CHECK-RV32-NEXT: bnez a3, .LBB61_877
+; CHECK-RV32-NEXT: j .LBB61_380
; CHECK-RV32-NEXT: .LBB61_877: # %cond.load1421
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7955,7 +8643,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_380
+; CHECK-RV32-NEXT: andi a3, a2, 32
+; CHECK-RV32-NEXT: bnez a3, .LBB61_878
+; CHECK-RV32-NEXT: j .LBB61_381
; CHECK-RV32-NEXT: .LBB61_878: # %cond.load1425
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7966,7 +8656,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_381
+; CHECK-RV32-NEXT: andi a3, a2, 64
+; CHECK-RV32-NEXT: bnez a3, .LBB61_879
+; CHECK-RV32-NEXT: j .LBB61_382
; CHECK-RV32-NEXT: .LBB61_879: # %cond.load1429
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7977,7 +8669,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_382
+; CHECK-RV32-NEXT: andi a3, a2, 128
+; CHECK-RV32-NEXT: bnez a3, .LBB61_880
+; CHECK-RV32-NEXT: j .LBB61_383
; CHECK-RV32-NEXT: .LBB61_880: # %cond.load1433
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7988,7 +8682,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_383
+; CHECK-RV32-NEXT: andi a3, a2, 256
+; CHECK-RV32-NEXT: bnez a3, .LBB61_881
+; CHECK-RV32-NEXT: j .LBB61_384
; CHECK-RV32-NEXT: .LBB61_881: # %cond.load1437
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -7999,7 +8695,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_384
+; CHECK-RV32-NEXT: andi a3, a2, 512
+; CHECK-RV32-NEXT: bnez a3, .LBB61_882
+; CHECK-RV32-NEXT: j .LBB61_385
; CHECK-RV32-NEXT: .LBB61_882: # %cond.load1441
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8010,7 +8708,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_385
+; CHECK-RV32-NEXT: andi a3, a2, 1024
+; CHECK-RV32-NEXT: bnez a3, .LBB61_883
+; CHECK-RV32-NEXT: j .LBB61_386
; CHECK-RV32-NEXT: .LBB61_883: # %cond.load1445
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8021,7 +8721,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_386
+; CHECK-RV32-NEXT: slli a3, a2, 20
+; CHECK-RV32-NEXT: bltz a3, .LBB61_884
+; CHECK-RV32-NEXT: j .LBB61_387
; CHECK-RV32-NEXT: .LBB61_884: # %cond.load1449
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8032,7 +8734,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_387
+; CHECK-RV32-NEXT: slli a3, a2, 19
+; CHECK-RV32-NEXT: bltz a3, .LBB61_885
+; CHECK-RV32-NEXT: j .LBB61_388
; CHECK-RV32-NEXT: .LBB61_885: # %cond.load1453
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8043,7 +8747,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_388
+; CHECK-RV32-NEXT: slli a3, a2, 18
+; CHECK-RV32-NEXT: bltz a3, .LBB61_886
+; CHECK-RV32-NEXT: j .LBB61_389
; CHECK-RV32-NEXT: .LBB61_886: # %cond.load1457
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8054,7 +8760,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_389
+; CHECK-RV32-NEXT: slli a3, a2, 17
+; CHECK-RV32-NEXT: bltz a3, .LBB61_887
+; CHECK-RV32-NEXT: j .LBB61_390
; CHECK-RV32-NEXT: .LBB61_887: # %cond.load1461
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8065,7 +8773,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_390
+; CHECK-RV32-NEXT: slli a3, a2, 16
+; CHECK-RV32-NEXT: bltz a3, .LBB61_888
+; CHECK-RV32-NEXT: j .LBB61_391
; CHECK-RV32-NEXT: .LBB61_888: # %cond.load1465
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8076,7 +8786,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_391
+; CHECK-RV32-NEXT: slli a3, a2, 15
+; CHECK-RV32-NEXT: bltz a3, .LBB61_889
+; CHECK-RV32-NEXT: j .LBB61_392
; CHECK-RV32-NEXT: .LBB61_889: # %cond.load1469
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8087,7 +8799,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_392
+; CHECK-RV32-NEXT: slli a3, a2, 14
+; CHECK-RV32-NEXT: bltz a3, .LBB61_890
+; CHECK-RV32-NEXT: j .LBB61_393
; CHECK-RV32-NEXT: .LBB61_890: # %cond.load1473
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8098,7 +8812,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_393
+; CHECK-RV32-NEXT: slli a3, a2, 13
+; CHECK-RV32-NEXT: bltz a3, .LBB61_891
+; CHECK-RV32-NEXT: j .LBB61_394
; CHECK-RV32-NEXT: .LBB61_891: # %cond.load1477
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8109,7 +8825,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_394
+; CHECK-RV32-NEXT: slli a3, a2, 12
+; CHECK-RV32-NEXT: bltz a3, .LBB61_892
+; CHECK-RV32-NEXT: j .LBB61_395
; CHECK-RV32-NEXT: .LBB61_892: # %cond.load1481
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8120,7 +8838,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_395
+; CHECK-RV32-NEXT: slli a3, a2, 11
+; CHECK-RV32-NEXT: bltz a3, .LBB61_893
+; CHECK-RV32-NEXT: j .LBB61_396
; CHECK-RV32-NEXT: .LBB61_893: # %cond.load1485
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8131,7 +8851,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_396
+; CHECK-RV32-NEXT: slli a3, a2, 10
+; CHECK-RV32-NEXT: bltz a3, .LBB61_894
+; CHECK-RV32-NEXT: j .LBB61_397
; CHECK-RV32-NEXT: .LBB61_894: # %cond.load1489
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8142,7 +8864,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_397
+; CHECK-RV32-NEXT: slli a3, a2, 9
+; CHECK-RV32-NEXT: bltz a3, .LBB61_895
+; CHECK-RV32-NEXT: j .LBB61_398
; CHECK-RV32-NEXT: .LBB61_895: # %cond.load1493
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8153,7 +8877,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_398
+; CHECK-RV32-NEXT: slli a3, a2, 8
+; CHECK-RV32-NEXT: bltz a3, .LBB61_896
+; CHECK-RV32-NEXT: j .LBB61_399
; CHECK-RV32-NEXT: .LBB61_896: # %cond.load1497
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8164,7 +8890,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_399
+; CHECK-RV32-NEXT: slli a3, a2, 7
+; CHECK-RV32-NEXT: bltz a3, .LBB61_897
+; CHECK-RV32-NEXT: j .LBB61_400
; CHECK-RV32-NEXT: .LBB61_897: # %cond.load1501
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8175,7 +8903,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_400
+; CHECK-RV32-NEXT: slli a3, a2, 6
+; CHECK-RV32-NEXT: bltz a3, .LBB61_898
+; CHECK-RV32-NEXT: j .LBB61_401
; CHECK-RV32-NEXT: .LBB61_898: # %cond.load1505
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8186,7 +8916,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_401
+; CHECK-RV32-NEXT: slli a3, a2, 5
+; CHECK-RV32-NEXT: bltz a3, .LBB61_899
+; CHECK-RV32-NEXT: j .LBB61_402
; CHECK-RV32-NEXT: .LBB61_899: # %cond.load1509
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8197,7 +8929,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_402
+; CHECK-RV32-NEXT: slli a3, a2, 4
+; CHECK-RV32-NEXT: bltz a3, .LBB61_900
+; CHECK-RV32-NEXT: j .LBB61_403
; CHECK-RV32-NEXT: .LBB61_900: # %cond.load1513
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8208,7 +8942,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_403
+; CHECK-RV32-NEXT: slli a3, a2, 3
+; CHECK-RV32-NEXT: bltz a3, .LBB61_901
+; CHECK-RV32-NEXT: j .LBB61_404
; CHECK-RV32-NEXT: .LBB61_901: # %cond.load1517
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8219,7 +8955,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_404
+; CHECK-RV32-NEXT: slli a3, a2, 2
+; CHECK-RV32-NEXT: bgez a3, .LBB61_1036
+; CHECK-RV32-NEXT: j .LBB61_405
+; CHECK-RV32-NEXT: .LBB61_1036: # %cond.load1517
+; CHECK-RV32-NEXT: j .LBB61_406
; CHECK-RV32-NEXT: .LBB61_902: # %cond.load1529
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a2
@@ -8228,7 +8968,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_409
+; CHECK-RV32-NEXT: andi a2, a3, 1
+; CHECK-RV32-NEXT: bnez a2, .LBB61_903
+; CHECK-RV32-NEXT: j .LBB61_410
; CHECK-RV32-NEXT: .LBB61_903: # %cond.load1533
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8239,7 +8981,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_410
+; CHECK-RV32-NEXT: andi a2, a3, 2
+; CHECK-RV32-NEXT: bnez a2, .LBB61_904
+; CHECK-RV32-NEXT: j .LBB61_411
; CHECK-RV32-NEXT: .LBB61_904: # %cond.load1537
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8250,7 +8994,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_411
+; CHECK-RV32-NEXT: andi a2, a3, 4
+; CHECK-RV32-NEXT: bnez a2, .LBB61_905
+; CHECK-RV32-NEXT: j .LBB61_412
; CHECK-RV32-NEXT: .LBB61_905: # %cond.load1541
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8261,7 +9007,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_412
+; CHECK-RV32-NEXT: andi a2, a3, 8
+; CHECK-RV32-NEXT: bnez a2, .LBB61_906
+; CHECK-RV32-NEXT: j .LBB61_413
; CHECK-RV32-NEXT: .LBB61_906: # %cond.load1545
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8272,7 +9020,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_413
+; CHECK-RV32-NEXT: andi a2, a3, 16
+; CHECK-RV32-NEXT: bnez a2, .LBB61_907
+; CHECK-RV32-NEXT: j .LBB61_414
; CHECK-RV32-NEXT: .LBB61_907: # %cond.load1549
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8283,7 +9033,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_414
+; CHECK-RV32-NEXT: andi a2, a3, 32
+; CHECK-RV32-NEXT: bnez a2, .LBB61_908
+; CHECK-RV32-NEXT: j .LBB61_415
; CHECK-RV32-NEXT: .LBB61_908: # %cond.load1553
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8294,7 +9046,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_415
+; CHECK-RV32-NEXT: andi a2, a3, 64
+; CHECK-RV32-NEXT: bnez a2, .LBB61_909
+; CHECK-RV32-NEXT: j .LBB61_416
; CHECK-RV32-NEXT: .LBB61_909: # %cond.load1557
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8305,7 +9059,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_416
+; CHECK-RV32-NEXT: andi a2, a3, 128
+; CHECK-RV32-NEXT: bnez a2, .LBB61_910
+; CHECK-RV32-NEXT: j .LBB61_417
; CHECK-RV32-NEXT: .LBB61_910: # %cond.load1561
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8316,7 +9072,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_417
+; CHECK-RV32-NEXT: andi a2, a3, 256
+; CHECK-RV32-NEXT: bnez a2, .LBB61_911
+; CHECK-RV32-NEXT: j .LBB61_418
; CHECK-RV32-NEXT: .LBB61_911: # %cond.load1565
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8327,7 +9085,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_418
+; CHECK-RV32-NEXT: andi a2, a3, 512
+; CHECK-RV32-NEXT: bnez a2, .LBB61_912
+; CHECK-RV32-NEXT: j .LBB61_419
; CHECK-RV32-NEXT: .LBB61_912: # %cond.load1569
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8338,7 +9098,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_419
+; CHECK-RV32-NEXT: andi a2, a3, 1024
+; CHECK-RV32-NEXT: bnez a2, .LBB61_913
+; CHECK-RV32-NEXT: j .LBB61_420
; CHECK-RV32-NEXT: .LBB61_913: # %cond.load1573
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8349,7 +9111,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_420
+; CHECK-RV32-NEXT: slli a2, a3, 20
+; CHECK-RV32-NEXT: bltz a2, .LBB61_914
+; CHECK-RV32-NEXT: j .LBB61_421
; CHECK-RV32-NEXT: .LBB61_914: # %cond.load1577
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8360,7 +9124,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_421
+; CHECK-RV32-NEXT: slli a2, a3, 19
+; CHECK-RV32-NEXT: bltz a2, .LBB61_915
+; CHECK-RV32-NEXT: j .LBB61_422
; CHECK-RV32-NEXT: .LBB61_915: # %cond.load1581
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8371,7 +9137,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_422
+; CHECK-RV32-NEXT: slli a2, a3, 18
+; CHECK-RV32-NEXT: bltz a2, .LBB61_916
+; CHECK-RV32-NEXT: j .LBB61_423
; CHECK-RV32-NEXT: .LBB61_916: # %cond.load1585
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8382,7 +9150,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_423
+; CHECK-RV32-NEXT: slli a2, a3, 17
+; CHECK-RV32-NEXT: bltz a2, .LBB61_917
+; CHECK-RV32-NEXT: j .LBB61_424
; CHECK-RV32-NEXT: .LBB61_917: # %cond.load1589
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8393,7 +9163,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_424
+; CHECK-RV32-NEXT: slli a2, a3, 16
+; CHECK-RV32-NEXT: bltz a2, .LBB61_918
+; CHECK-RV32-NEXT: j .LBB61_425
; CHECK-RV32-NEXT: .LBB61_918: # %cond.load1593
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8404,7 +9176,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_425
+; CHECK-RV32-NEXT: slli a2, a3, 15
+; CHECK-RV32-NEXT: bltz a2, .LBB61_919
+; CHECK-RV32-NEXT: j .LBB61_426
; CHECK-RV32-NEXT: .LBB61_919: # %cond.load1597
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8415,7 +9189,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_426
+; CHECK-RV32-NEXT: slli a2, a3, 14
+; CHECK-RV32-NEXT: bltz a2, .LBB61_920
+; CHECK-RV32-NEXT: j .LBB61_427
; CHECK-RV32-NEXT: .LBB61_920: # %cond.load1601
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8426,7 +9202,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_427
+; CHECK-RV32-NEXT: slli a2, a3, 13
+; CHECK-RV32-NEXT: bltz a2, .LBB61_921
+; CHECK-RV32-NEXT: j .LBB61_428
; CHECK-RV32-NEXT: .LBB61_921: # %cond.load1605
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8437,7 +9215,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_428
+; CHECK-RV32-NEXT: slli a2, a3, 12
+; CHECK-RV32-NEXT: bltz a2, .LBB61_922
+; CHECK-RV32-NEXT: j .LBB61_429
; CHECK-RV32-NEXT: .LBB61_922: # %cond.load1609
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8448,7 +9228,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_429
+; CHECK-RV32-NEXT: slli a2, a3, 11
+; CHECK-RV32-NEXT: bltz a2, .LBB61_923
+; CHECK-RV32-NEXT: j .LBB61_430
; CHECK-RV32-NEXT: .LBB61_923: # %cond.load1613
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8459,7 +9241,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_430
+; CHECK-RV32-NEXT: slli a2, a3, 10
+; CHECK-RV32-NEXT: bltz a2, .LBB61_924
+; CHECK-RV32-NEXT: j .LBB61_431
; CHECK-RV32-NEXT: .LBB61_924: # %cond.load1617
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8470,7 +9254,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_431
+; CHECK-RV32-NEXT: slli a2, a3, 9
+; CHECK-RV32-NEXT: bltz a2, .LBB61_925
+; CHECK-RV32-NEXT: j .LBB61_432
; CHECK-RV32-NEXT: .LBB61_925: # %cond.load1621
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8481,7 +9267,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_432
+; CHECK-RV32-NEXT: slli a2, a3, 8
+; CHECK-RV32-NEXT: bltz a2, .LBB61_926
+; CHECK-RV32-NEXT: j .LBB61_433
; CHECK-RV32-NEXT: .LBB61_926: # %cond.load1625
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8492,7 +9280,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_433
+; CHECK-RV32-NEXT: slli a2, a3, 7
+; CHECK-RV32-NEXT: bltz a2, .LBB61_927
+; CHECK-RV32-NEXT: j .LBB61_434
; CHECK-RV32-NEXT: .LBB61_927: # %cond.load1629
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8503,7 +9293,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_434
+; CHECK-RV32-NEXT: slli a2, a3, 6
+; CHECK-RV32-NEXT: bltz a2, .LBB61_928
+; CHECK-RV32-NEXT: j .LBB61_435
; CHECK-RV32-NEXT: .LBB61_928: # %cond.load1633
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8514,7 +9306,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_435
+; CHECK-RV32-NEXT: slli a2, a3, 5
+; CHECK-RV32-NEXT: bltz a2, .LBB61_929
+; CHECK-RV32-NEXT: j .LBB61_436
; CHECK-RV32-NEXT: .LBB61_929: # %cond.load1637
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8525,7 +9319,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_436
+; CHECK-RV32-NEXT: slli a2, a3, 4
+; CHECK-RV32-NEXT: bltz a2, .LBB61_930
+; CHECK-RV32-NEXT: j .LBB61_437
; CHECK-RV32-NEXT: .LBB61_930: # %cond.load1641
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8536,7 +9332,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_437
+; CHECK-RV32-NEXT: slli a2, a3, 3
+; CHECK-RV32-NEXT: bltz a2, .LBB61_931
+; CHECK-RV32-NEXT: j .LBB61_438
; CHECK-RV32-NEXT: .LBB61_931: # %cond.load1645
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8547,7 +9345,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_438
+; CHECK-RV32-NEXT: slli a2, a3, 2
+; CHECK-RV32-NEXT: bgez a2, .LBB61_1037
+; CHECK-RV32-NEXT: j .LBB61_439
+; CHECK-RV32-NEXT: .LBB61_1037: # %cond.load1645
+; CHECK-RV32-NEXT: j .LBB61_440
; CHECK-RV32-NEXT: .LBB61_932: # %cond.load1657
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a3
@@ -8556,7 +9358,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_443
+; CHECK-RV32-NEXT: andi a3, a2, 1
+; CHECK-RV32-NEXT: bnez a3, .LBB61_933
+; CHECK-RV32-NEXT: j .LBB61_444
; CHECK-RV32-NEXT: .LBB61_933: # %cond.load1661
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8567,7 +9371,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_444
+; CHECK-RV32-NEXT: andi a3, a2, 2
+; CHECK-RV32-NEXT: bnez a3, .LBB61_934
+; CHECK-RV32-NEXT: j .LBB61_445
; CHECK-RV32-NEXT: .LBB61_934: # %cond.load1665
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8578,7 +9384,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_445
+; CHECK-RV32-NEXT: andi a3, a2, 4
+; CHECK-RV32-NEXT: bnez a3, .LBB61_935
+; CHECK-RV32-NEXT: j .LBB61_446
; CHECK-RV32-NEXT: .LBB61_935: # %cond.load1669
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8589,7 +9397,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_446
+; CHECK-RV32-NEXT: andi a3, a2, 8
+; CHECK-RV32-NEXT: bnez a3, .LBB61_936
+; CHECK-RV32-NEXT: j .LBB61_447
; CHECK-RV32-NEXT: .LBB61_936: # %cond.load1673
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8600,7 +9410,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_447
+; CHECK-RV32-NEXT: andi a3, a2, 16
+; CHECK-RV32-NEXT: bnez a3, .LBB61_937
+; CHECK-RV32-NEXT: j .LBB61_448
; CHECK-RV32-NEXT: .LBB61_937: # %cond.load1677
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8611,7 +9423,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_448
+; CHECK-RV32-NEXT: andi a3, a2, 32
+; CHECK-RV32-NEXT: bnez a3, .LBB61_938
+; CHECK-RV32-NEXT: j .LBB61_449
; CHECK-RV32-NEXT: .LBB61_938: # %cond.load1681
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8622,7 +9436,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_449
+; CHECK-RV32-NEXT: andi a3, a2, 64
+; CHECK-RV32-NEXT: bnez a3, .LBB61_939
+; CHECK-RV32-NEXT: j .LBB61_450
; CHECK-RV32-NEXT: .LBB61_939: # %cond.load1685
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8633,7 +9449,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_450
+; CHECK-RV32-NEXT: andi a3, a2, 128
+; CHECK-RV32-NEXT: bnez a3, .LBB61_940
+; CHECK-RV32-NEXT: j .LBB61_451
; CHECK-RV32-NEXT: .LBB61_940: # %cond.load1689
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8644,7 +9462,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_451
+; CHECK-RV32-NEXT: andi a3, a2, 256
+; CHECK-RV32-NEXT: bnez a3, .LBB61_941
+; CHECK-RV32-NEXT: j .LBB61_452
; CHECK-RV32-NEXT: .LBB61_941: # %cond.load1693
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8655,7 +9475,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_452
+; CHECK-RV32-NEXT: andi a3, a2, 512
+; CHECK-RV32-NEXT: bnez a3, .LBB61_942
+; CHECK-RV32-NEXT: j .LBB61_453
; CHECK-RV32-NEXT: .LBB61_942: # %cond.load1697
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8666,7 +9488,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_453
+; CHECK-RV32-NEXT: andi a3, a2, 1024
+; CHECK-RV32-NEXT: bnez a3, .LBB61_943
+; CHECK-RV32-NEXT: j .LBB61_454
; CHECK-RV32-NEXT: .LBB61_943: # %cond.load1701
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8677,7 +9501,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_454
+; CHECK-RV32-NEXT: slli a3, a2, 20
+; CHECK-RV32-NEXT: bltz a3, .LBB61_944
+; CHECK-RV32-NEXT: j .LBB61_455
; CHECK-RV32-NEXT: .LBB61_944: # %cond.load1705
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8688,7 +9514,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_455
+; CHECK-RV32-NEXT: slli a3, a2, 19
+; CHECK-RV32-NEXT: bltz a3, .LBB61_945
+; CHECK-RV32-NEXT: j .LBB61_456
; CHECK-RV32-NEXT: .LBB61_945: # %cond.load1709
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8699,7 +9527,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_456
+; CHECK-RV32-NEXT: slli a3, a2, 18
+; CHECK-RV32-NEXT: bltz a3, .LBB61_946
+; CHECK-RV32-NEXT: j .LBB61_457
; CHECK-RV32-NEXT: .LBB61_946: # %cond.load1713
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8710,7 +9540,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_457
+; CHECK-RV32-NEXT: slli a3, a2, 17
+; CHECK-RV32-NEXT: bltz a3, .LBB61_947
+; CHECK-RV32-NEXT: j .LBB61_458
; CHECK-RV32-NEXT: .LBB61_947: # %cond.load1717
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8721,7 +9553,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_458
+; CHECK-RV32-NEXT: slli a3, a2, 16
+; CHECK-RV32-NEXT: bltz a3, .LBB61_948
+; CHECK-RV32-NEXT: j .LBB61_459
; CHECK-RV32-NEXT: .LBB61_948: # %cond.load1721
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8732,7 +9566,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_459
+; CHECK-RV32-NEXT: slli a3, a2, 15
+; CHECK-RV32-NEXT: bltz a3, .LBB61_949
+; CHECK-RV32-NEXT: j .LBB61_460
; CHECK-RV32-NEXT: .LBB61_949: # %cond.load1725
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8743,7 +9579,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_460
+; CHECK-RV32-NEXT: slli a3, a2, 14
+; CHECK-RV32-NEXT: bltz a3, .LBB61_950
+; CHECK-RV32-NEXT: j .LBB61_461
; CHECK-RV32-NEXT: .LBB61_950: # %cond.load1729
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8754,7 +9592,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_461
+; CHECK-RV32-NEXT: slli a3, a2, 13
+; CHECK-RV32-NEXT: bltz a3, .LBB61_951
+; CHECK-RV32-NEXT: j .LBB61_462
; CHECK-RV32-NEXT: .LBB61_951: # %cond.load1733
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8765,7 +9605,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_462
+; CHECK-RV32-NEXT: slli a3, a2, 12
+; CHECK-RV32-NEXT: bltz a3, .LBB61_952
+; CHECK-RV32-NEXT: j .LBB61_463
; CHECK-RV32-NEXT: .LBB61_952: # %cond.load1737
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8776,7 +9618,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_463
+; CHECK-RV32-NEXT: slli a3, a2, 11
+; CHECK-RV32-NEXT: bltz a3, .LBB61_953
+; CHECK-RV32-NEXT: j .LBB61_464
; CHECK-RV32-NEXT: .LBB61_953: # %cond.load1741
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8787,7 +9631,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_464
+; CHECK-RV32-NEXT: slli a3, a2, 10
+; CHECK-RV32-NEXT: bltz a3, .LBB61_954
+; CHECK-RV32-NEXT: j .LBB61_465
; CHECK-RV32-NEXT: .LBB61_954: # %cond.load1745
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8798,7 +9644,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_465
+; CHECK-RV32-NEXT: slli a3, a2, 9
+; CHECK-RV32-NEXT: bltz a3, .LBB61_955
+; CHECK-RV32-NEXT: j .LBB61_466
; CHECK-RV32-NEXT: .LBB61_955: # %cond.load1749
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8809,7 +9657,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_466
+; CHECK-RV32-NEXT: slli a3, a2, 8
+; CHECK-RV32-NEXT: bltz a3, .LBB61_956
+; CHECK-RV32-NEXT: j .LBB61_467
; CHECK-RV32-NEXT: .LBB61_956: # %cond.load1753
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8820,7 +9670,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_467
+; CHECK-RV32-NEXT: slli a3, a2, 7
+; CHECK-RV32-NEXT: bltz a3, .LBB61_957
+; CHECK-RV32-NEXT: j .LBB61_468
; CHECK-RV32-NEXT: .LBB61_957: # %cond.load1757
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8831,7 +9683,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_468
+; CHECK-RV32-NEXT: slli a3, a2, 6
+; CHECK-RV32-NEXT: bltz a3, .LBB61_958
+; CHECK-RV32-NEXT: j .LBB61_469
; CHECK-RV32-NEXT: .LBB61_958: # %cond.load1761
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8842,7 +9696,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_469
+; CHECK-RV32-NEXT: slli a3, a2, 5
+; CHECK-RV32-NEXT: bltz a3, .LBB61_959
+; CHECK-RV32-NEXT: j .LBB61_470
; CHECK-RV32-NEXT: .LBB61_959: # %cond.load1765
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8853,7 +9709,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_470
+; CHECK-RV32-NEXT: slli a3, a2, 4
+; CHECK-RV32-NEXT: bltz a3, .LBB61_960
+; CHECK-RV32-NEXT: j .LBB61_471
; CHECK-RV32-NEXT: .LBB61_960: # %cond.load1769
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8864,7 +9722,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_471
+; CHECK-RV32-NEXT: slli a3, a2, 3
+; CHECK-RV32-NEXT: bltz a3, .LBB61_961
+; CHECK-RV32-NEXT: j .LBB61_472
; CHECK-RV32-NEXT: .LBB61_961: # %cond.load1773
; CHECK-RV32-NEXT: lbu a3, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8875,7 +9735,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_472
+; CHECK-RV32-NEXT: slli a3, a2, 2
+; CHECK-RV32-NEXT: bgez a3, .LBB61_1038
+; CHECK-RV32-NEXT: j .LBB61_473
+; CHECK-RV32-NEXT: .LBB61_1038: # %cond.load1773
+; CHECK-RV32-NEXT: j .LBB61_474
; CHECK-RV32-NEXT: .LBB61_962: # %cond.load1785
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a2
@@ -8884,7 +9748,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_477
+; CHECK-RV32-NEXT: andi a2, a3, 1
+; CHECK-RV32-NEXT: bnez a2, .LBB61_963
+; CHECK-RV32-NEXT: j .LBB61_478
; CHECK-RV32-NEXT: .LBB61_963: # %cond.load1789
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8895,7 +9761,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_478
+; CHECK-RV32-NEXT: andi a2, a3, 2
+; CHECK-RV32-NEXT: bnez a2, .LBB61_964
+; CHECK-RV32-NEXT: j .LBB61_479
; CHECK-RV32-NEXT: .LBB61_964: # %cond.load1793
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8906,7 +9774,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_479
+; CHECK-RV32-NEXT: andi a2, a3, 4
+; CHECK-RV32-NEXT: bnez a2, .LBB61_965
+; CHECK-RV32-NEXT: j .LBB61_480
; CHECK-RV32-NEXT: .LBB61_965: # %cond.load1797
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8917,7 +9787,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_480
+; CHECK-RV32-NEXT: andi a2, a3, 8
+; CHECK-RV32-NEXT: bnez a2, .LBB61_966
+; CHECK-RV32-NEXT: j .LBB61_481
; CHECK-RV32-NEXT: .LBB61_966: # %cond.load1801
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8928,7 +9800,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_481
+; CHECK-RV32-NEXT: andi a2, a3, 16
+; CHECK-RV32-NEXT: bnez a2, .LBB61_967
+; CHECK-RV32-NEXT: j .LBB61_482
; CHECK-RV32-NEXT: .LBB61_967: # %cond.load1805
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8939,7 +9813,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_482
+; CHECK-RV32-NEXT: andi a2, a3, 32
+; CHECK-RV32-NEXT: bnez a2, .LBB61_968
+; CHECK-RV32-NEXT: j .LBB61_483
; CHECK-RV32-NEXT: .LBB61_968: # %cond.load1809
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8950,7 +9826,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_483
+; CHECK-RV32-NEXT: andi a2, a3, 64
+; CHECK-RV32-NEXT: bnez a2, .LBB61_969
+; CHECK-RV32-NEXT: j .LBB61_484
; CHECK-RV32-NEXT: .LBB61_969: # %cond.load1813
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8961,7 +9839,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_484
+; CHECK-RV32-NEXT: andi a2, a3, 128
+; CHECK-RV32-NEXT: bnez a2, .LBB61_970
+; CHECK-RV32-NEXT: j .LBB61_485
; CHECK-RV32-NEXT: .LBB61_970: # %cond.load1817
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8972,7 +9852,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_485
+; CHECK-RV32-NEXT: andi a2, a3, 256
+; CHECK-RV32-NEXT: bnez a2, .LBB61_971
+; CHECK-RV32-NEXT: j .LBB61_486
; CHECK-RV32-NEXT: .LBB61_971: # %cond.load1821
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8983,7 +9865,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_486
+; CHECK-RV32-NEXT: andi a2, a3, 512
+; CHECK-RV32-NEXT: bnez a2, .LBB61_972
+; CHECK-RV32-NEXT: j .LBB61_487
; CHECK-RV32-NEXT: .LBB61_972: # %cond.load1825
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -8994,7 +9878,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_487
+; CHECK-RV32-NEXT: andi a2, a3, 1024
+; CHECK-RV32-NEXT: bnez a2, .LBB61_973
+; CHECK-RV32-NEXT: j .LBB61_488
; CHECK-RV32-NEXT: .LBB61_973: # %cond.load1829
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9005,7 +9891,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_488
+; CHECK-RV32-NEXT: slli a2, a3, 20
+; CHECK-RV32-NEXT: bltz a2, .LBB61_974
+; CHECK-RV32-NEXT: j .LBB61_489
; CHECK-RV32-NEXT: .LBB61_974: # %cond.load1833
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9016,7 +9904,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_489
+; CHECK-RV32-NEXT: slli a2, a3, 19
+; CHECK-RV32-NEXT: bltz a2, .LBB61_975
+; CHECK-RV32-NEXT: j .LBB61_490
; CHECK-RV32-NEXT: .LBB61_975: # %cond.load1837
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9027,7 +9917,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_490
+; CHECK-RV32-NEXT: slli a2, a3, 18
+; CHECK-RV32-NEXT: bltz a2, .LBB61_976
+; CHECK-RV32-NEXT: j .LBB61_491
; CHECK-RV32-NEXT: .LBB61_976: # %cond.load1841
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9038,7 +9930,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_491
+; CHECK-RV32-NEXT: slli a2, a3, 17
+; CHECK-RV32-NEXT: bltz a2, .LBB61_977
+; CHECK-RV32-NEXT: j .LBB61_492
; CHECK-RV32-NEXT: .LBB61_977: # %cond.load1845
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9049,7 +9943,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_492
+; CHECK-RV32-NEXT: slli a2, a3, 16
+; CHECK-RV32-NEXT: bltz a2, .LBB61_978
+; CHECK-RV32-NEXT: j .LBB61_493
; CHECK-RV32-NEXT: .LBB61_978: # %cond.load1849
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9060,7 +9956,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_493
+; CHECK-RV32-NEXT: slli a2, a3, 15
+; CHECK-RV32-NEXT: bltz a2, .LBB61_979
+; CHECK-RV32-NEXT: j .LBB61_494
; CHECK-RV32-NEXT: .LBB61_979: # %cond.load1853
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9071,8 +9969,10 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_494
-; CHECK-RV32-NEXT: .LBB61_980: # %cond.load1857
+; CHECK-RV32-NEXT: slli a2, a3, 14
+; CHECK-RV32-NEXT: bltz a2, .LBB61_980
+; CHECK-RV32-NEXT: j .LBB61_495
+; CHECK-RV32-NEXT: .LBB61_980: # %cond.load1857
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma
@@ -9082,7 +9982,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_495
+; CHECK-RV32-NEXT: slli a2, a3, 13
+; CHECK-RV32-NEXT: bltz a2, .LBB61_981
+; CHECK-RV32-NEXT: j .LBB61_496
; CHECK-RV32-NEXT: .LBB61_981: # %cond.load1861
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9093,7 +9995,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_496
+; CHECK-RV32-NEXT: slli a2, a3, 12
+; CHECK-RV32-NEXT: bltz a2, .LBB61_982
+; CHECK-RV32-NEXT: j .LBB61_497
; CHECK-RV32-NEXT: .LBB61_982: # %cond.load1865
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9104,7 +10008,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_497
+; CHECK-RV32-NEXT: slli a2, a3, 11
+; CHECK-RV32-NEXT: bltz a2, .LBB61_983
+; CHECK-RV32-NEXT: j .LBB61_498
; CHECK-RV32-NEXT: .LBB61_983: # %cond.load1869
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9115,7 +10021,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_498
+; CHECK-RV32-NEXT: slli a2, a3, 10
+; CHECK-RV32-NEXT: bltz a2, .LBB61_984
+; CHECK-RV32-NEXT: j .LBB61_499
; CHECK-RV32-NEXT: .LBB61_984: # %cond.load1873
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9126,7 +10034,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_499
+; CHECK-RV32-NEXT: slli a2, a3, 9
+; CHECK-RV32-NEXT: bltz a2, .LBB61_985
+; CHECK-RV32-NEXT: j .LBB61_500
; CHECK-RV32-NEXT: .LBB61_985: # %cond.load1877
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9137,7 +10047,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_500
+; CHECK-RV32-NEXT: slli a2, a3, 8
+; CHECK-RV32-NEXT: bltz a2, .LBB61_986
+; CHECK-RV32-NEXT: j .LBB61_501
; CHECK-RV32-NEXT: .LBB61_986: # %cond.load1881
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9148,7 +10060,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_501
+; CHECK-RV32-NEXT: slli a2, a3, 7
+; CHECK-RV32-NEXT: bltz a2, .LBB61_987
+; CHECK-RV32-NEXT: j .LBB61_502
; CHECK-RV32-NEXT: .LBB61_987: # %cond.load1885
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9159,7 +10073,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_502
+; CHECK-RV32-NEXT: slli a2, a3, 6
+; CHECK-RV32-NEXT: bltz a2, .LBB61_988
+; CHECK-RV32-NEXT: j .LBB61_503
; CHECK-RV32-NEXT: .LBB61_988: # %cond.load1889
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9170,7 +10086,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_503
+; CHECK-RV32-NEXT: slli a2, a3, 5
+; CHECK-RV32-NEXT: bltz a2, .LBB61_989
+; CHECK-RV32-NEXT: j .LBB61_504
; CHECK-RV32-NEXT: .LBB61_989: # %cond.load1893
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9181,7 +10099,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_504
+; CHECK-RV32-NEXT: slli a2, a3, 4
+; CHECK-RV32-NEXT: bltz a2, .LBB61_990
+; CHECK-RV32-NEXT: j .LBB61_505
; CHECK-RV32-NEXT: .LBB61_990: # %cond.load1897
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9192,7 +10112,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_505
+; CHECK-RV32-NEXT: slli a2, a3, 3
+; CHECK-RV32-NEXT: bltz a2, .LBB61_991
+; CHECK-RV32-NEXT: j .LBB61_506
; CHECK-RV32-NEXT: .LBB61_991: # %cond.load1901
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a4, 512
@@ -9203,7 +10125,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_506
+; CHECK-RV32-NEXT: slli a2, a3, 2
+; CHECK-RV32-NEXT: bgez a2, .LBB61_1039
+; CHECK-RV32-NEXT: j .LBB61_507
+; CHECK-RV32-NEXT: .LBB61_1039: # %cond.load1901
+; CHECK-RV32-NEXT: j .LBB61_508
; CHECK-RV32-NEXT: .LBB61_992: # %cond.load1913
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: vmv.s.x v16, a2
@@ -9212,7 +10138,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_511
+; CHECK-RV32-NEXT: andi a2, a1, 1
+; CHECK-RV32-NEXT: bnez a2, .LBB61_993
+; CHECK-RV32-NEXT: j .LBB61_512
; CHECK-RV32-NEXT: .LBB61_993: # %cond.load1917
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9223,7 +10151,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_512
+; CHECK-RV32-NEXT: andi a2, a1, 2
+; CHECK-RV32-NEXT: bnez a2, .LBB61_994
+; CHECK-RV32-NEXT: j .LBB61_513
; CHECK-RV32-NEXT: .LBB61_994: # %cond.load1921
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9234,7 +10164,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_513
+; CHECK-RV32-NEXT: andi a2, a1, 4
+; CHECK-RV32-NEXT: bnez a2, .LBB61_995
+; CHECK-RV32-NEXT: j .LBB61_514
; CHECK-RV32-NEXT: .LBB61_995: # %cond.load1925
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9245,7 +10177,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_514
+; CHECK-RV32-NEXT: andi a2, a1, 8
+; CHECK-RV32-NEXT: bnez a2, .LBB61_996
+; CHECK-RV32-NEXT: j .LBB61_515
; CHECK-RV32-NEXT: .LBB61_996: # %cond.load1929
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9256,7 +10190,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_515
+; CHECK-RV32-NEXT: andi a2, a1, 16
+; CHECK-RV32-NEXT: bnez a2, .LBB61_997
+; CHECK-RV32-NEXT: j .LBB61_516
; CHECK-RV32-NEXT: .LBB61_997: # %cond.load1933
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9267,7 +10203,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_516
+; CHECK-RV32-NEXT: andi a2, a1, 32
+; CHECK-RV32-NEXT: bnez a2, .LBB61_998
+; CHECK-RV32-NEXT: j .LBB61_517
; CHECK-RV32-NEXT: .LBB61_998: # %cond.load1937
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9278,7 +10216,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_517
+; CHECK-RV32-NEXT: andi a2, a1, 64
+; CHECK-RV32-NEXT: bnez a2, .LBB61_999
+; CHECK-RV32-NEXT: j .LBB61_518
; CHECK-RV32-NEXT: .LBB61_999: # %cond.load1941
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9289,7 +10229,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_518
+; CHECK-RV32-NEXT: andi a2, a1, 128
+; CHECK-RV32-NEXT: bnez a2, .LBB61_1000
+; CHECK-RV32-NEXT: j .LBB61_519
; CHECK-RV32-NEXT: .LBB61_1000: # %cond.load1945
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9300,7 +10242,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_519
+; CHECK-RV32-NEXT: andi a2, a1, 256
+; CHECK-RV32-NEXT: bnez a2, .LBB61_1001
+; CHECK-RV32-NEXT: j .LBB61_520
; CHECK-RV32-NEXT: .LBB61_1001: # %cond.load1949
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9311,7 +10255,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_520
+; CHECK-RV32-NEXT: andi a2, a1, 512
+; CHECK-RV32-NEXT: bnez a2, .LBB61_1002
+; CHECK-RV32-NEXT: j .LBB61_521
; CHECK-RV32-NEXT: .LBB61_1002: # %cond.load1953
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9322,7 +10268,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_521
+; CHECK-RV32-NEXT: andi a2, a1, 1024
+; CHECK-RV32-NEXT: bnez a2, .LBB61_1003
+; CHECK-RV32-NEXT: j .LBB61_522
; CHECK-RV32-NEXT: .LBB61_1003: # %cond.load1957
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9333,7 +10281,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_522
+; CHECK-RV32-NEXT: slli a2, a1, 20
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1004
+; CHECK-RV32-NEXT: j .LBB61_523
; CHECK-RV32-NEXT: .LBB61_1004: # %cond.load1961
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9344,7 +10294,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_523
+; CHECK-RV32-NEXT: slli a2, a1, 19
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1005
+; CHECK-RV32-NEXT: j .LBB61_524
; CHECK-RV32-NEXT: .LBB61_1005: # %cond.load1965
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9355,7 +10307,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_524
+; CHECK-RV32-NEXT: slli a2, a1, 18
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1006
+; CHECK-RV32-NEXT: j .LBB61_525
; CHECK-RV32-NEXT: .LBB61_1006: # %cond.load1969
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9366,7 +10320,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_525
+; CHECK-RV32-NEXT: slli a2, a1, 17
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1007
+; CHECK-RV32-NEXT: j .LBB61_526
; CHECK-RV32-NEXT: .LBB61_1007: # %cond.load1973
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9377,7 +10333,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_526
+; CHECK-RV32-NEXT: slli a2, a1, 16
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1008
+; CHECK-RV32-NEXT: j .LBB61_527
; CHECK-RV32-NEXT: .LBB61_1008: # %cond.load1977
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9388,7 +10346,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_527
+; CHECK-RV32-NEXT: slli a2, a1, 15
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1009
+; CHECK-RV32-NEXT: j .LBB61_528
; CHECK-RV32-NEXT: .LBB61_1009: # %cond.load1981
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9399,7 +10359,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_528
+; CHECK-RV32-NEXT: slli a2, a1, 14
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1010
+; CHECK-RV32-NEXT: j .LBB61_529
; CHECK-RV32-NEXT: .LBB61_1010: # %cond.load1985
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9410,7 +10372,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_529
+; CHECK-RV32-NEXT: slli a2, a1, 13
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1011
+; CHECK-RV32-NEXT: j .LBB61_530
; CHECK-RV32-NEXT: .LBB61_1011: # %cond.load1989
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9421,7 +10385,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_530
+; CHECK-RV32-NEXT: slli a2, a1, 12
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1012
+; CHECK-RV32-NEXT: j .LBB61_531
; CHECK-RV32-NEXT: .LBB61_1012: # %cond.load1993
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9432,7 +10398,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_531
+; CHECK-RV32-NEXT: slli a2, a1, 11
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1013
+; CHECK-RV32-NEXT: j .LBB61_532
; CHECK-RV32-NEXT: .LBB61_1013: # %cond.load1997
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9443,7 +10411,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_532
+; CHECK-RV32-NEXT: slli a2, a1, 10
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1014
+; CHECK-RV32-NEXT: j .LBB61_533
; CHECK-RV32-NEXT: .LBB61_1014: # %cond.load2001
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9454,7 +10424,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_533
+; CHECK-RV32-NEXT: slli a2, a1, 9
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1015
+; CHECK-RV32-NEXT: j .LBB61_534
; CHECK-RV32-NEXT: .LBB61_1015: # %cond.load2005
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9465,7 +10437,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_534
+; CHECK-RV32-NEXT: slli a2, a1, 8
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1016
+; CHECK-RV32-NEXT: j .LBB61_535
; CHECK-RV32-NEXT: .LBB61_1016: # %cond.load2009
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9476,7 +10450,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_535
+; CHECK-RV32-NEXT: slli a2, a1, 7
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1017
+; CHECK-RV32-NEXT: j .LBB61_536
; CHECK-RV32-NEXT: .LBB61_1017: # %cond.load2013
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9487,7 +10463,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_536
+; CHECK-RV32-NEXT: slli a2, a1, 6
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1018
+; CHECK-RV32-NEXT: j .LBB61_537
; CHECK-RV32-NEXT: .LBB61_1018: # %cond.load2017
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9498,7 +10476,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_537
+; CHECK-RV32-NEXT: slli a2, a1, 5
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1019
+; CHECK-RV32-NEXT: j .LBB61_538
; CHECK-RV32-NEXT: .LBB61_1019: # %cond.load2021
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9509,7 +10489,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_538
+; CHECK-RV32-NEXT: slli a2, a1, 4
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1020
+; CHECK-RV32-NEXT: j .LBB61_539
; CHECK-RV32-NEXT: .LBB61_1020: # %cond.load2025
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9520,7 +10502,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_539
+; CHECK-RV32-NEXT: slli a2, a1, 3
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1021
+; CHECK-RV32-NEXT: j .LBB61_540
; CHECK-RV32-NEXT: .LBB61_1021: # %cond.load2029
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9531,7 +10515,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_540
+; CHECK-RV32-NEXT: slli a2, a1, 2
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1022
+; CHECK-RV32-NEXT: j .LBB61_541
; CHECK-RV32-NEXT: .LBB61_1022: # %cond.load2033
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9542,7 +10528,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV32-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV32-NEXT: addi a0, a0, 1
-; CHECK-RV32-NEXT: j .LBB61_541
+; CHECK-RV32-NEXT: slli a2, a1, 1
+; CHECK-RV32-NEXT: bltz a2, .LBB61_1023
+; CHECK-RV32-NEXT: j .LBB61_542
; CHECK-RV32-NEXT: .LBB61_1023: # %cond.load2037
; CHECK-RV32-NEXT: lbu a2, 0(a0)
; CHECK-RV32-NEXT: li a3, 512
@@ -9814,7 +10802,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_61: # %else238
; CHECK-RV64-NEXT: slli a1, a2, 2
; CHECK-RV64-NEXT: bgez a1, .LBB61_63
-; CHECK-RV64-NEXT: # %bb.62: # %cond.load241
+; CHECK-RV64-NEXT: .LBB61_62: # %cond.load241
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -10094,7 +11082,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_127: # %else494
; CHECK-RV64-NEXT: slli a2, a1, 2
; CHECK-RV64-NEXT: bgez a2, .LBB61_129
-; CHECK-RV64-NEXT: # %bb.128: # %cond.load497
+; CHECK-RV64-NEXT: .LBB61_128: # %cond.load497
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -10374,7 +11362,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_193: # %else750
; CHECK-RV64-NEXT: slli a1, a2, 2
; CHECK-RV64-NEXT: bgez a1, .LBB61_195
-; CHECK-RV64-NEXT: # %bb.194: # %cond.load753
+; CHECK-RV64-NEXT: .LBB61_194: # %cond.load753
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -10654,7 +11642,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_259: # %else1006
; CHECK-RV64-NEXT: slli a2, a1, 2
; CHECK-RV64-NEXT: bgez a2, .LBB61_261
-; CHECK-RV64-NEXT: # %bb.260: # %cond.load1009
+; CHECK-RV64-NEXT: .LBB61_260: # %cond.load1009
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -10934,7 +11922,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_325: # %else1262
; CHECK-RV64-NEXT: slli a1, a2, 2
; CHECK-RV64-NEXT: bgez a1, .LBB61_327
-; CHECK-RV64-NEXT: # %bb.326: # %cond.load1265
+; CHECK-RV64-NEXT: .LBB61_326: # %cond.load1265
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma
@@ -11209,7 +12197,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_391: # %else1518
; CHECK-RV64-NEXT: slli a2, a1, 2
; CHECK-RV64-NEXT: bgez a2, .LBB61_393
-; CHECK-RV64-NEXT: # %bb.392: # %cond.load1521
+; CHECK-RV64-NEXT: .LBB61_392: # %cond.load1521
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma
@@ -11484,7 +12472,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: .LBB61_457: # %else1774
; CHECK-RV64-NEXT: slli a1, a2, 2
; CHECK-RV64-NEXT: bgez a1, .LBB61_459
-; CHECK-RV64-NEXT: # %bb.458: # %cond.load1777
+; CHECK-RV64-NEXT: .LBB61_458: # %cond.load1777
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma
@@ -11777,7 +12765,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_1
+; CHECK-RV64-NEXT: andi a1, a2, 2
+; CHECK-RV64-NEXT: bnez a1, .LBB61_528
+; CHECK-RV64-NEXT: j .LBB61_2
; CHECK-RV64-NEXT: .LBB61_528: # %cond.load1
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, m1, tu, ma
@@ -11787,7 +12777,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_2
+; CHECK-RV64-NEXT: andi a1, a2, 4
+; CHECK-RV64-NEXT: bnez a1, .LBB61_529
+; CHECK-RV64-NEXT: j .LBB61_3
; CHECK-RV64-NEXT: .LBB61_529: # %cond.load5
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 3, e8, m1, tu, ma
@@ -11797,7 +12789,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_3
+; CHECK-RV64-NEXT: andi a1, a2, 8
+; CHECK-RV64-NEXT: bnez a1, .LBB61_530
+; CHECK-RV64-NEXT: j .LBB61_4
; CHECK-RV64-NEXT: .LBB61_530: # %cond.load9
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 4, e8, m1, tu, ma
@@ -11807,7 +12801,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_4
+; CHECK-RV64-NEXT: andi a1, a2, 16
+; CHECK-RV64-NEXT: bnez a1, .LBB61_531
+; CHECK-RV64-NEXT: j .LBB61_5
; CHECK-RV64-NEXT: .LBB61_531: # %cond.load13
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 5, e8, m1, tu, ma
@@ -11817,7 +12813,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_5
+; CHECK-RV64-NEXT: andi a1, a2, 32
+; CHECK-RV64-NEXT: bnez a1, .LBB61_532
+; CHECK-RV64-NEXT: j .LBB61_6
; CHECK-RV64-NEXT: .LBB61_532: # %cond.load17
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 6, e8, m1, tu, ma
@@ -11827,7 +12825,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_6
+; CHECK-RV64-NEXT: andi a1, a2, 64
+; CHECK-RV64-NEXT: bnez a1, .LBB61_533
+; CHECK-RV64-NEXT: j .LBB61_7
; CHECK-RV64-NEXT: .LBB61_533: # %cond.load21
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 7, e8, m1, tu, ma
@@ -11837,7 +12837,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_7
+; CHECK-RV64-NEXT: andi a1, a2, 128
+; CHECK-RV64-NEXT: bnez a1, .LBB61_534
+; CHECK-RV64-NEXT: j .LBB61_8
; CHECK-RV64-NEXT: .LBB61_534: # %cond.load25
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 8, e8, m1, tu, ma
@@ -11847,7 +12849,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_8
+; CHECK-RV64-NEXT: andi a1, a2, 256
+; CHECK-RV64-NEXT: bnez a1, .LBB61_535
+; CHECK-RV64-NEXT: j .LBB61_9
; CHECK-RV64-NEXT: .LBB61_535: # %cond.load29
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 9, e8, m1, tu, ma
@@ -11857,7 +12861,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_9
+; CHECK-RV64-NEXT: andi a1, a2, 512
+; CHECK-RV64-NEXT: bnez a1, .LBB61_536
+; CHECK-RV64-NEXT: j .LBB61_10
; CHECK-RV64-NEXT: .LBB61_536: # %cond.load33
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 10, e8, m1, tu, ma
@@ -11867,7 +12873,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_10
+; CHECK-RV64-NEXT: andi a1, a2, 1024
+; CHECK-RV64-NEXT: bnez a1, .LBB61_537
+; CHECK-RV64-NEXT: j .LBB61_11
; CHECK-RV64-NEXT: .LBB61_537: # %cond.load37
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 11, e8, m1, tu, ma
@@ -11877,7 +12885,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_11
+; CHECK-RV64-NEXT: slli a1, a2, 52
+; CHECK-RV64-NEXT: bltz a1, .LBB61_538
+; CHECK-RV64-NEXT: j .LBB61_12
; CHECK-RV64-NEXT: .LBB61_538: # %cond.load41
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 12, e8, m1, tu, ma
@@ -11887,7 +12897,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_12
+; CHECK-RV64-NEXT: slli a1, a2, 51
+; CHECK-RV64-NEXT: bltz a1, .LBB61_539
+; CHECK-RV64-NEXT: j .LBB61_13
; CHECK-RV64-NEXT: .LBB61_539: # %cond.load45
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 13, e8, m1, tu, ma
@@ -11897,7 +12909,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_13
+; CHECK-RV64-NEXT: slli a1, a2, 50
+; CHECK-RV64-NEXT: bltz a1, .LBB61_540
+; CHECK-RV64-NEXT: j .LBB61_14
; CHECK-RV64-NEXT: .LBB61_540: # %cond.load49
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 14, e8, m1, tu, ma
@@ -11907,7 +12921,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_14
+; CHECK-RV64-NEXT: slli a1, a2, 49
+; CHECK-RV64-NEXT: bltz a1, .LBB61_541
+; CHECK-RV64-NEXT: j .LBB61_15
; CHECK-RV64-NEXT: .LBB61_541: # %cond.load53
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 15, e8, m1, tu, ma
@@ -11917,7 +12933,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_15
+; CHECK-RV64-NEXT: slli a1, a2, 48
+; CHECK-RV64-NEXT: bltz a1, .LBB61_542
+; CHECK-RV64-NEXT: j .LBB61_16
; CHECK-RV64-NEXT: .LBB61_542: # %cond.load57
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 16, e8, m1, tu, ma
@@ -11927,7 +12945,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_16
+; CHECK-RV64-NEXT: slli a1, a2, 47
+; CHECK-RV64-NEXT: bltz a1, .LBB61_543
+; CHECK-RV64-NEXT: j .LBB61_17
; CHECK-RV64-NEXT: .LBB61_543: # %cond.load61
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 17, e8, m1, tu, ma
@@ -11937,7 +12957,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_17
+; CHECK-RV64-NEXT: slli a1, a2, 46
+; CHECK-RV64-NEXT: bltz a1, .LBB61_544
+; CHECK-RV64-NEXT: j .LBB61_18
; CHECK-RV64-NEXT: .LBB61_544: # %cond.load65
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 18, e8, m1, tu, ma
@@ -11947,7 +12969,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_18
+; CHECK-RV64-NEXT: slli a1, a2, 45
+; CHECK-RV64-NEXT: bltz a1, .LBB61_545
+; CHECK-RV64-NEXT: j .LBB61_19
; CHECK-RV64-NEXT: .LBB61_545: # %cond.load69
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 19, e8, m1, tu, ma
@@ -11957,7 +12981,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_19
+; CHECK-RV64-NEXT: slli a1, a2, 44
+; CHECK-RV64-NEXT: bltz a1, .LBB61_546
+; CHECK-RV64-NEXT: j .LBB61_20
; CHECK-RV64-NEXT: .LBB61_546: # %cond.load73
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 20, e8, m1, tu, ma
@@ -11967,7 +12993,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_20
+; CHECK-RV64-NEXT: slli a1, a2, 43
+; CHECK-RV64-NEXT: bltz a1, .LBB61_547
+; CHECK-RV64-NEXT: j .LBB61_21
; CHECK-RV64-NEXT: .LBB61_547: # %cond.load77
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 21, e8, m1, tu, ma
@@ -11977,7 +13005,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_21
+; CHECK-RV64-NEXT: slli a1, a2, 42
+; CHECK-RV64-NEXT: bltz a1, .LBB61_548
+; CHECK-RV64-NEXT: j .LBB61_22
; CHECK-RV64-NEXT: .LBB61_548: # %cond.load81
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 22, e8, m1, tu, ma
@@ -11987,7 +13017,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_22
+; CHECK-RV64-NEXT: slli a1, a2, 41
+; CHECK-RV64-NEXT: bltz a1, .LBB61_549
+; CHECK-RV64-NEXT: j .LBB61_23
; CHECK-RV64-NEXT: .LBB61_549: # %cond.load85
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 23, e8, m1, tu, ma
@@ -11997,7 +13029,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_23
+; CHECK-RV64-NEXT: slli a1, a2, 40
+; CHECK-RV64-NEXT: bltz a1, .LBB61_550
+; CHECK-RV64-NEXT: j .LBB61_24
; CHECK-RV64-NEXT: .LBB61_550: # %cond.load89
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 24, e8, m1, tu, ma
@@ -12007,7 +13041,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_24
+; CHECK-RV64-NEXT: slli a1, a2, 39
+; CHECK-RV64-NEXT: bltz a1, .LBB61_551
+; CHECK-RV64-NEXT: j .LBB61_25
; CHECK-RV64-NEXT: .LBB61_551: # %cond.load93
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 25, e8, m1, tu, ma
@@ -12017,7 +13053,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_25
+; CHECK-RV64-NEXT: slli a1, a2, 38
+; CHECK-RV64-NEXT: bltz a1, .LBB61_552
+; CHECK-RV64-NEXT: j .LBB61_26
; CHECK-RV64-NEXT: .LBB61_552: # %cond.load97
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 26, e8, m1, tu, ma
@@ -12027,7 +13065,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_26
+; CHECK-RV64-NEXT: slli a1, a2, 37
+; CHECK-RV64-NEXT: bltz a1, .LBB61_553
+; CHECK-RV64-NEXT: j .LBB61_27
; CHECK-RV64-NEXT: .LBB61_553: # %cond.load101
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 27, e8, m1, tu, ma
@@ -12037,7 +13077,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_27
+; CHECK-RV64-NEXT: slli a1, a2, 36
+; CHECK-RV64-NEXT: bltz a1, .LBB61_554
+; CHECK-RV64-NEXT: j .LBB61_28
; CHECK-RV64-NEXT: .LBB61_554: # %cond.load105
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 28, e8, m1, tu, ma
@@ -12047,7 +13089,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_28
+; CHECK-RV64-NEXT: slli a1, a2, 35
+; CHECK-RV64-NEXT: bltz a1, .LBB61_555
+; CHECK-RV64-NEXT: j .LBB61_29
; CHECK-RV64-NEXT: .LBB61_555: # %cond.load109
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 29, e8, m1, tu, ma
@@ -12057,7 +13101,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_29
+; CHECK-RV64-NEXT: slli a1, a2, 34
+; CHECK-RV64-NEXT: bltz a1, .LBB61_556
+; CHECK-RV64-NEXT: j .LBB61_30
; CHECK-RV64-NEXT: .LBB61_556: # %cond.load113
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 30, e8, m1, tu, ma
@@ -12067,7 +13113,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_30
+; CHECK-RV64-NEXT: slli a1, a2, 33
+; CHECK-RV64-NEXT: bltz a1, .LBB61_557
+; CHECK-RV64-NEXT: j .LBB61_31
; CHECK-RV64-NEXT: .LBB61_557: # %cond.load117
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 31, e8, m1, tu, ma
@@ -12077,7 +13125,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_31
+; CHECK-RV64-NEXT: slli a1, a2, 32
+; CHECK-RV64-NEXT: bltz a1, .LBB61_558
+; CHECK-RV64-NEXT: j .LBB61_32
; CHECK-RV64-NEXT: .LBB61_558: # %cond.load121
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12089,7 +13139,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_32
+; CHECK-RV64-NEXT: slli a1, a2, 31
+; CHECK-RV64-NEXT: bltz a1, .LBB61_559
+; CHECK-RV64-NEXT: j .LBB61_33
; CHECK-RV64-NEXT: .LBB61_559: # %cond.load125
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12102,7 +13154,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_33
+; CHECK-RV64-NEXT: slli a1, a2, 30
+; CHECK-RV64-NEXT: bltz a1, .LBB61_560
+; CHECK-RV64-NEXT: j .LBB61_34
; CHECK-RV64-NEXT: .LBB61_560: # %cond.load129
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12115,7 +13169,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_34
+; CHECK-RV64-NEXT: slli a1, a2, 29
+; CHECK-RV64-NEXT: bltz a1, .LBB61_561
+; CHECK-RV64-NEXT: j .LBB61_35
; CHECK-RV64-NEXT: .LBB61_561: # %cond.load133
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12128,7 +13184,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_35
+; CHECK-RV64-NEXT: slli a1, a2, 28
+; CHECK-RV64-NEXT: bltz a1, .LBB61_562
+; CHECK-RV64-NEXT: j .LBB61_36
; CHECK-RV64-NEXT: .LBB61_562: # %cond.load137
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12141,7 +13199,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_36
+; CHECK-RV64-NEXT: slli a1, a2, 27
+; CHECK-RV64-NEXT: bltz a1, .LBB61_563
+; CHECK-RV64-NEXT: j .LBB61_37
; CHECK-RV64-NEXT: .LBB61_563: # %cond.load141
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12154,7 +13214,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_37
+; CHECK-RV64-NEXT: slli a1, a2, 26
+; CHECK-RV64-NEXT: bltz a1, .LBB61_564
+; CHECK-RV64-NEXT: j .LBB61_38
; CHECK-RV64-NEXT: .LBB61_564: # %cond.load145
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12167,7 +13229,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_38
+; CHECK-RV64-NEXT: slli a1, a2, 25
+; CHECK-RV64-NEXT: bltz a1, .LBB61_565
+; CHECK-RV64-NEXT: j .LBB61_39
; CHECK-RV64-NEXT: .LBB61_565: # %cond.load149
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12180,7 +13244,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_39
+; CHECK-RV64-NEXT: slli a1, a2, 24
+; CHECK-RV64-NEXT: bltz a1, .LBB61_566
+; CHECK-RV64-NEXT: j .LBB61_40
; CHECK-RV64-NEXT: .LBB61_566: # %cond.load153
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12193,7 +13259,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_40
+; CHECK-RV64-NEXT: slli a1, a2, 23
+; CHECK-RV64-NEXT: bltz a1, .LBB61_567
+; CHECK-RV64-NEXT: j .LBB61_41
; CHECK-RV64-NEXT: .LBB61_567: # %cond.load157
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12206,7 +13274,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_41
+; CHECK-RV64-NEXT: slli a1, a2, 22
+; CHECK-RV64-NEXT: bltz a1, .LBB61_568
+; CHECK-RV64-NEXT: j .LBB61_42
; CHECK-RV64-NEXT: .LBB61_568: # %cond.load161
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12219,7 +13289,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_42
+; CHECK-RV64-NEXT: slli a1, a2, 21
+; CHECK-RV64-NEXT: bltz a1, .LBB61_569
+; CHECK-RV64-NEXT: j .LBB61_43
; CHECK-RV64-NEXT: .LBB61_569: # %cond.load165
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12232,8 +13304,10 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_43
-; CHECK-RV64-NEXT: .LBB61_570: # %cond.load169
+; CHECK-RV64-NEXT: slli a1, a2, 20
+; CHECK-RV64-NEXT: bltz a1, .LBB61_570
+; CHECK-RV64-NEXT: j .LBB61_44
+; CHECK-RV64-NEXT: .LBB61_570: # %cond.load169
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -12245,7 +13319,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_44
+; CHECK-RV64-NEXT: slli a1, a2, 19
+; CHECK-RV64-NEXT: bltz a1, .LBB61_571
+; CHECK-RV64-NEXT: j .LBB61_45
; CHECK-RV64-NEXT: .LBB61_571: # %cond.load173
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12258,7 +13334,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_45
+; CHECK-RV64-NEXT: slli a1, a2, 18
+; CHECK-RV64-NEXT: bltz a1, .LBB61_572
+; CHECK-RV64-NEXT: j .LBB61_46
; CHECK-RV64-NEXT: .LBB61_572: # %cond.load177
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12271,7 +13349,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_46
+; CHECK-RV64-NEXT: slli a1, a2, 17
+; CHECK-RV64-NEXT: bltz a1, .LBB61_573
+; CHECK-RV64-NEXT: j .LBB61_47
; CHECK-RV64-NEXT: .LBB61_573: # %cond.load181
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12284,7 +13364,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_47
+; CHECK-RV64-NEXT: slli a1, a2, 16
+; CHECK-RV64-NEXT: bltz a1, .LBB61_574
+; CHECK-RV64-NEXT: j .LBB61_48
; CHECK-RV64-NEXT: .LBB61_574: # %cond.load185
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12297,7 +13379,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_48
+; CHECK-RV64-NEXT: slli a1, a2, 15
+; CHECK-RV64-NEXT: bltz a1, .LBB61_575
+; CHECK-RV64-NEXT: j .LBB61_49
; CHECK-RV64-NEXT: .LBB61_575: # %cond.load189
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12310,7 +13394,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_49
+; CHECK-RV64-NEXT: slli a1, a2, 14
+; CHECK-RV64-NEXT: bltz a1, .LBB61_576
+; CHECK-RV64-NEXT: j .LBB61_50
; CHECK-RV64-NEXT: .LBB61_576: # %cond.load193
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12323,7 +13409,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_50
+; CHECK-RV64-NEXT: slli a1, a2, 13
+; CHECK-RV64-NEXT: bltz a1, .LBB61_577
+; CHECK-RV64-NEXT: j .LBB61_51
; CHECK-RV64-NEXT: .LBB61_577: # %cond.load197
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12336,7 +13424,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_51
+; CHECK-RV64-NEXT: slli a1, a2, 12
+; CHECK-RV64-NEXT: bltz a1, .LBB61_578
+; CHECK-RV64-NEXT: j .LBB61_52
; CHECK-RV64-NEXT: .LBB61_578: # %cond.load201
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12349,7 +13439,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_52
+; CHECK-RV64-NEXT: slli a1, a2, 11
+; CHECK-RV64-NEXT: bltz a1, .LBB61_579
+; CHECK-RV64-NEXT: j .LBB61_53
; CHECK-RV64-NEXT: .LBB61_579: # %cond.load205
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12362,7 +13454,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_53
+; CHECK-RV64-NEXT: slli a1, a2, 10
+; CHECK-RV64-NEXT: bltz a1, .LBB61_580
+; CHECK-RV64-NEXT: j .LBB61_54
; CHECK-RV64-NEXT: .LBB61_580: # %cond.load209
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12375,7 +13469,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_54
+; CHECK-RV64-NEXT: slli a1, a2, 9
+; CHECK-RV64-NEXT: bltz a1, .LBB61_581
+; CHECK-RV64-NEXT: j .LBB61_55
; CHECK-RV64-NEXT: .LBB61_581: # %cond.load213
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12388,7 +13484,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_55
+; CHECK-RV64-NEXT: slli a1, a2, 8
+; CHECK-RV64-NEXT: bltz a1, .LBB61_582
+; CHECK-RV64-NEXT: j .LBB61_56
; CHECK-RV64-NEXT: .LBB61_582: # %cond.load217
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12401,7 +13499,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_56
+; CHECK-RV64-NEXT: slli a1, a2, 7
+; CHECK-RV64-NEXT: bltz a1, .LBB61_583
+; CHECK-RV64-NEXT: j .LBB61_57
; CHECK-RV64-NEXT: .LBB61_583: # %cond.load221
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12414,7 +13514,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_57
+; CHECK-RV64-NEXT: slli a1, a2, 6
+; CHECK-RV64-NEXT: bltz a1, .LBB61_584
+; CHECK-RV64-NEXT: j .LBB61_58
; CHECK-RV64-NEXT: .LBB61_584: # %cond.load225
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12427,7 +13529,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_58
+; CHECK-RV64-NEXT: slli a1, a2, 5
+; CHECK-RV64-NEXT: bltz a1, .LBB61_585
+; CHECK-RV64-NEXT: j .LBB61_59
; CHECK-RV64-NEXT: .LBB61_585: # %cond.load229
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12440,7 +13544,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_59
+; CHECK-RV64-NEXT: slli a1, a2, 4
+; CHECK-RV64-NEXT: bltz a1, .LBB61_586
+; CHECK-RV64-NEXT: j .LBB61_60
; CHECK-RV64-NEXT: .LBB61_586: # %cond.load233
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12453,7 +13559,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_60
+; CHECK-RV64-NEXT: slli a1, a2, 3
+; CHECK-RV64-NEXT: bltz a1, .LBB61_587
+; CHECK-RV64-NEXT: j .LBB61_61
; CHECK-RV64-NEXT: .LBB61_587: # %cond.load237
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12466,7 +13574,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_61
+; CHECK-RV64-NEXT: slli a1, a2, 2
+; CHECK-RV64-NEXT: bgez a1, .LBB61_1025
+; CHECK-RV64-NEXT: j .LBB61_62
+; CHECK-RV64-NEXT: .LBB61_1025: # %cond.load237
+; CHECK-RV64-NEXT: j .LBB61_63
; CHECK-RV64-NEXT: .LBB61_588: # %cond.load249
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -12478,7 +13590,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv1r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_66
+; CHECK-RV64-NEXT: andi a2, a1, 1
+; CHECK-RV64-NEXT: bnez a2, .LBB61_589
+; CHECK-RV64-NEXT: j .LBB61_67
; CHECK-RV64-NEXT: .LBB61_589: # %cond.load253
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12491,7 +13605,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_67
+; CHECK-RV64-NEXT: andi a2, a1, 2
+; CHECK-RV64-NEXT: bnez a2, .LBB61_590
+; CHECK-RV64-NEXT: j .LBB61_68
; CHECK-RV64-NEXT: .LBB61_590: # %cond.load257
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12504,7 +13620,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_68
+; CHECK-RV64-NEXT: andi a2, a1, 4
+; CHECK-RV64-NEXT: bnez a2, .LBB61_591
+; CHECK-RV64-NEXT: j .LBB61_69
; CHECK-RV64-NEXT: .LBB61_591: # %cond.load261
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12517,7 +13635,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_69
+; CHECK-RV64-NEXT: andi a2, a1, 8
+; CHECK-RV64-NEXT: bnez a2, .LBB61_592
+; CHECK-RV64-NEXT: j .LBB61_70
; CHECK-RV64-NEXT: .LBB61_592: # %cond.load265
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12530,7 +13650,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_70
+; CHECK-RV64-NEXT: andi a2, a1, 16
+; CHECK-RV64-NEXT: bnez a2, .LBB61_593
+; CHECK-RV64-NEXT: j .LBB61_71
; CHECK-RV64-NEXT: .LBB61_593: # %cond.load269
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12543,7 +13665,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_71
+; CHECK-RV64-NEXT: andi a2, a1, 32
+; CHECK-RV64-NEXT: bnez a2, .LBB61_594
+; CHECK-RV64-NEXT: j .LBB61_72
; CHECK-RV64-NEXT: .LBB61_594: # %cond.load273
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12556,7 +13680,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_72
+; CHECK-RV64-NEXT: andi a2, a1, 64
+; CHECK-RV64-NEXT: bnez a2, .LBB61_595
+; CHECK-RV64-NEXT: j .LBB61_73
; CHECK-RV64-NEXT: .LBB61_595: # %cond.load277
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12569,7 +13695,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_73
+; CHECK-RV64-NEXT: andi a2, a1, 128
+; CHECK-RV64-NEXT: bnez a2, .LBB61_596
+; CHECK-RV64-NEXT: j .LBB61_74
; CHECK-RV64-NEXT: .LBB61_596: # %cond.load281
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12582,7 +13710,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_74
+; CHECK-RV64-NEXT: andi a2, a1, 256
+; CHECK-RV64-NEXT: bnez a2, .LBB61_597
+; CHECK-RV64-NEXT: j .LBB61_75
; CHECK-RV64-NEXT: .LBB61_597: # %cond.load285
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12595,7 +13725,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_75
+; CHECK-RV64-NEXT: andi a2, a1, 512
+; CHECK-RV64-NEXT: bnez a2, .LBB61_598
+; CHECK-RV64-NEXT: j .LBB61_76
; CHECK-RV64-NEXT: .LBB61_598: # %cond.load289
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12608,7 +13740,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_76
+; CHECK-RV64-NEXT: andi a2, a1, 1024
+; CHECK-RV64-NEXT: bnez a2, .LBB61_599
+; CHECK-RV64-NEXT: j .LBB61_77
; CHECK-RV64-NEXT: .LBB61_599: # %cond.load293
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12621,7 +13755,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_77
+; CHECK-RV64-NEXT: slli a2, a1, 52
+; CHECK-RV64-NEXT: bltz a2, .LBB61_600
+; CHECK-RV64-NEXT: j .LBB61_78
; CHECK-RV64-NEXT: .LBB61_600: # %cond.load297
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12634,7 +13770,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_78
+; CHECK-RV64-NEXT: slli a2, a1, 51
+; CHECK-RV64-NEXT: bltz a2, .LBB61_601
+; CHECK-RV64-NEXT: j .LBB61_79
; CHECK-RV64-NEXT: .LBB61_601: # %cond.load301
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12647,7 +13785,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_79
+; CHECK-RV64-NEXT: slli a2, a1, 50
+; CHECK-RV64-NEXT: bltz a2, .LBB61_602
+; CHECK-RV64-NEXT: j .LBB61_80
; CHECK-RV64-NEXT: .LBB61_602: # %cond.load305
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12660,7 +13800,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_80
+; CHECK-RV64-NEXT: slli a2, a1, 49
+; CHECK-RV64-NEXT: bltz a2, .LBB61_603
+; CHECK-RV64-NEXT: j .LBB61_81
; CHECK-RV64-NEXT: .LBB61_603: # %cond.load309
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12673,7 +13815,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_81
+; CHECK-RV64-NEXT: slli a2, a1, 48
+; CHECK-RV64-NEXT: bltz a2, .LBB61_604
+; CHECK-RV64-NEXT: j .LBB61_82
; CHECK-RV64-NEXT: .LBB61_604: # %cond.load313
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12686,7 +13830,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_82
+; CHECK-RV64-NEXT: slli a2, a1, 47
+; CHECK-RV64-NEXT: bltz a2, .LBB61_605
+; CHECK-RV64-NEXT: j .LBB61_83
; CHECK-RV64-NEXT: .LBB61_605: # %cond.load317
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12699,7 +13845,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_83
+; CHECK-RV64-NEXT: slli a2, a1, 46
+; CHECK-RV64-NEXT: bltz a2, .LBB61_606
+; CHECK-RV64-NEXT: j .LBB61_84
; CHECK-RV64-NEXT: .LBB61_606: # %cond.load321
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12712,7 +13860,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_84
+; CHECK-RV64-NEXT: slli a2, a1, 45
+; CHECK-RV64-NEXT: bltz a2, .LBB61_607
+; CHECK-RV64-NEXT: j .LBB61_85
; CHECK-RV64-NEXT: .LBB61_607: # %cond.load325
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12725,7 +13875,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_85
+; CHECK-RV64-NEXT: slli a2, a1, 44
+; CHECK-RV64-NEXT: bltz a2, .LBB61_608
+; CHECK-RV64-NEXT: j .LBB61_86
; CHECK-RV64-NEXT: .LBB61_608: # %cond.load329
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12738,7 +13890,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_86
+; CHECK-RV64-NEXT: slli a2, a1, 43
+; CHECK-RV64-NEXT: bltz a2, .LBB61_609
+; CHECK-RV64-NEXT: j .LBB61_87
; CHECK-RV64-NEXT: .LBB61_609: # %cond.load333
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12751,7 +13905,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_87
+; CHECK-RV64-NEXT: slli a2, a1, 42
+; CHECK-RV64-NEXT: bltz a2, .LBB61_610
+; CHECK-RV64-NEXT: j .LBB61_88
; CHECK-RV64-NEXT: .LBB61_610: # %cond.load337
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12764,7 +13920,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_88
+; CHECK-RV64-NEXT: slli a2, a1, 41
+; CHECK-RV64-NEXT: bltz a2, .LBB61_611
+; CHECK-RV64-NEXT: j .LBB61_89
; CHECK-RV64-NEXT: .LBB61_611: # %cond.load341
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12777,7 +13935,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_89
+; CHECK-RV64-NEXT: slli a2, a1, 40
+; CHECK-RV64-NEXT: bltz a2, .LBB61_612
+; CHECK-RV64-NEXT: j .LBB61_90
; CHECK-RV64-NEXT: .LBB61_612: # %cond.load345
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12790,7 +13950,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_90
+; CHECK-RV64-NEXT: slli a2, a1, 39
+; CHECK-RV64-NEXT: bltz a2, .LBB61_613
+; CHECK-RV64-NEXT: j .LBB61_91
; CHECK-RV64-NEXT: .LBB61_613: # %cond.load349
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12803,7 +13965,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_91
+; CHECK-RV64-NEXT: slli a2, a1, 38
+; CHECK-RV64-NEXT: bltz a2, .LBB61_614
+; CHECK-RV64-NEXT: j .LBB61_92
; CHECK-RV64-NEXT: .LBB61_614: # %cond.load353
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12816,7 +13980,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_92
+; CHECK-RV64-NEXT: slli a2, a1, 37
+; CHECK-RV64-NEXT: bltz a2, .LBB61_615
+; CHECK-RV64-NEXT: j .LBB61_93
; CHECK-RV64-NEXT: .LBB61_615: # %cond.load357
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12829,7 +13995,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_93
+; CHECK-RV64-NEXT: slli a2, a1, 36
+; CHECK-RV64-NEXT: bltz a2, .LBB61_616
+; CHECK-RV64-NEXT: j .LBB61_94
; CHECK-RV64-NEXT: .LBB61_616: # %cond.load361
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12842,7 +14010,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_94
+; CHECK-RV64-NEXT: slli a2, a1, 35
+; CHECK-RV64-NEXT: bltz a2, .LBB61_617
+; CHECK-RV64-NEXT: j .LBB61_95
; CHECK-RV64-NEXT: .LBB61_617: # %cond.load365
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12855,7 +14025,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_95
+; CHECK-RV64-NEXT: slli a2, a1, 34
+; CHECK-RV64-NEXT: bltz a2, .LBB61_618
+; CHECK-RV64-NEXT: j .LBB61_96
; CHECK-RV64-NEXT: .LBB61_618: # %cond.load369
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12868,7 +14040,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_96
+; CHECK-RV64-NEXT: slli a2, a1, 33
+; CHECK-RV64-NEXT: bltz a2, .LBB61_619
+; CHECK-RV64-NEXT: j .LBB61_97
; CHECK-RV64-NEXT: .LBB61_619: # %cond.load373
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12881,7 +14055,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_97
+; CHECK-RV64-NEXT: slli a2, a1, 32
+; CHECK-RV64-NEXT: bltz a2, .LBB61_620
+; CHECK-RV64-NEXT: j .LBB61_98
; CHECK-RV64-NEXT: .LBB61_620: # %cond.load377
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12894,7 +14070,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_98
+; CHECK-RV64-NEXT: slli a2, a1, 31
+; CHECK-RV64-NEXT: bltz a2, .LBB61_621
+; CHECK-RV64-NEXT: j .LBB61_99
; CHECK-RV64-NEXT: .LBB61_621: # %cond.load381
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12907,7 +14085,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_99
+; CHECK-RV64-NEXT: slli a2, a1, 30
+; CHECK-RV64-NEXT: bltz a2, .LBB61_622
+; CHECK-RV64-NEXT: j .LBB61_100
; CHECK-RV64-NEXT: .LBB61_622: # %cond.load385
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12920,7 +14100,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_100
+; CHECK-RV64-NEXT: slli a2, a1, 29
+; CHECK-RV64-NEXT: bltz a2, .LBB61_623
+; CHECK-RV64-NEXT: j .LBB61_101
; CHECK-RV64-NEXT: .LBB61_623: # %cond.load389
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12933,7 +14115,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_101
+; CHECK-RV64-NEXT: slli a2, a1, 28
+; CHECK-RV64-NEXT: bltz a2, .LBB61_624
+; CHECK-RV64-NEXT: j .LBB61_102
; CHECK-RV64-NEXT: .LBB61_624: # %cond.load393
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12946,7 +14130,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_102
+; CHECK-RV64-NEXT: slli a2, a1, 27
+; CHECK-RV64-NEXT: bltz a2, .LBB61_625
+; CHECK-RV64-NEXT: j .LBB61_103
; CHECK-RV64-NEXT: .LBB61_625: # %cond.load397
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12959,7 +14145,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_103
+; CHECK-RV64-NEXT: slli a2, a1, 26
+; CHECK-RV64-NEXT: bltz a2, .LBB61_626
+; CHECK-RV64-NEXT: j .LBB61_104
; CHECK-RV64-NEXT: .LBB61_626: # %cond.load401
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12972,7 +14160,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_104
+; CHECK-RV64-NEXT: slli a2, a1, 25
+; CHECK-RV64-NEXT: bltz a2, .LBB61_627
+; CHECK-RV64-NEXT: j .LBB61_105
; CHECK-RV64-NEXT: .LBB61_627: # %cond.load405
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12985,7 +14175,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_105
+; CHECK-RV64-NEXT: slli a2, a1, 24
+; CHECK-RV64-NEXT: bltz a2, .LBB61_628
+; CHECK-RV64-NEXT: j .LBB61_106
; CHECK-RV64-NEXT: .LBB61_628: # %cond.load409
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12998,7 +14190,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_106
+; CHECK-RV64-NEXT: slli a2, a1, 23
+; CHECK-RV64-NEXT: bltz a2, .LBB61_629
+; CHECK-RV64-NEXT: j .LBB61_107
; CHECK-RV64-NEXT: .LBB61_629: # %cond.load413
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13011,7 +14205,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_107
+; CHECK-RV64-NEXT: slli a2, a1, 22
+; CHECK-RV64-NEXT: bltz a2, .LBB61_630
+; CHECK-RV64-NEXT: j .LBB61_108
; CHECK-RV64-NEXT: .LBB61_630: # %cond.load417
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13024,7 +14220,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_108
+; CHECK-RV64-NEXT: slli a2, a1, 21
+; CHECK-RV64-NEXT: bltz a2, .LBB61_631
+; CHECK-RV64-NEXT: j .LBB61_109
; CHECK-RV64-NEXT: .LBB61_631: # %cond.load421
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13037,7 +14235,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_109
+; CHECK-RV64-NEXT: slli a2, a1, 20
+; CHECK-RV64-NEXT: bltz a2, .LBB61_632
+; CHECK-RV64-NEXT: j .LBB61_110
; CHECK-RV64-NEXT: .LBB61_632: # %cond.load425
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13050,7 +14250,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_110
+; CHECK-RV64-NEXT: slli a2, a1, 19
+; CHECK-RV64-NEXT: bltz a2, .LBB61_633
+; CHECK-RV64-NEXT: j .LBB61_111
; CHECK-RV64-NEXT: .LBB61_633: # %cond.load429
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13063,7 +14265,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_111
+; CHECK-RV64-NEXT: slli a2, a1, 18
+; CHECK-RV64-NEXT: bltz a2, .LBB61_634
+; CHECK-RV64-NEXT: j .LBB61_112
; CHECK-RV64-NEXT: .LBB61_634: # %cond.load433
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13076,7 +14280,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_112
+; CHECK-RV64-NEXT: slli a2, a1, 17
+; CHECK-RV64-NEXT: bltz a2, .LBB61_635
+; CHECK-RV64-NEXT: j .LBB61_113
; CHECK-RV64-NEXT: .LBB61_635: # %cond.load437
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13089,7 +14295,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_113
+; CHECK-RV64-NEXT: slli a2, a1, 16
+; CHECK-RV64-NEXT: bltz a2, .LBB61_636
+; CHECK-RV64-NEXT: j .LBB61_114
; CHECK-RV64-NEXT: .LBB61_636: # %cond.load441
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13102,7 +14310,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_114
+; CHECK-RV64-NEXT: slli a2, a1, 15
+; CHECK-RV64-NEXT: bltz a2, .LBB61_637
+; CHECK-RV64-NEXT: j .LBB61_115
; CHECK-RV64-NEXT: .LBB61_637: # %cond.load445
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13115,7 +14325,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_115
+; CHECK-RV64-NEXT: slli a2, a1, 14
+; CHECK-RV64-NEXT: bltz a2, .LBB61_638
+; CHECK-RV64-NEXT: j .LBB61_116
; CHECK-RV64-NEXT: .LBB61_638: # %cond.load449
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13128,7 +14340,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_116
+; CHECK-RV64-NEXT: slli a2, a1, 13
+; CHECK-RV64-NEXT: bltz a2, .LBB61_639
+; CHECK-RV64-NEXT: j .LBB61_117
; CHECK-RV64-NEXT: .LBB61_639: # %cond.load453
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13141,7 +14355,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_117
+; CHECK-RV64-NEXT: slli a2, a1, 12
+; CHECK-RV64-NEXT: bltz a2, .LBB61_640
+; CHECK-RV64-NEXT: j .LBB61_118
; CHECK-RV64-NEXT: .LBB61_640: # %cond.load457
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13154,7 +14370,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_118
+; CHECK-RV64-NEXT: slli a2, a1, 11
+; CHECK-RV64-NEXT: bltz a2, .LBB61_641
+; CHECK-RV64-NEXT: j .LBB61_119
; CHECK-RV64-NEXT: .LBB61_641: # %cond.load461
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13167,7 +14385,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_119
+; CHECK-RV64-NEXT: slli a2, a1, 10
+; CHECK-RV64-NEXT: bltz a2, .LBB61_642
+; CHECK-RV64-NEXT: j .LBB61_120
; CHECK-RV64-NEXT: .LBB61_642: # %cond.load465
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13180,7 +14400,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_120
+; CHECK-RV64-NEXT: slli a2, a1, 9
+; CHECK-RV64-NEXT: bltz a2, .LBB61_643
+; CHECK-RV64-NEXT: j .LBB61_121
; CHECK-RV64-NEXT: .LBB61_643: # %cond.load469
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13193,7 +14415,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_121
+; CHECK-RV64-NEXT: slli a2, a1, 8
+; CHECK-RV64-NEXT: bltz a2, .LBB61_644
+; CHECK-RV64-NEXT: j .LBB61_122
; CHECK-RV64-NEXT: .LBB61_644: # %cond.load473
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13206,7 +14430,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_122
+; CHECK-RV64-NEXT: slli a2, a1, 7
+; CHECK-RV64-NEXT: bltz a2, .LBB61_645
+; CHECK-RV64-NEXT: j .LBB61_123
; CHECK-RV64-NEXT: .LBB61_645: # %cond.load477
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13219,7 +14445,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_123
+; CHECK-RV64-NEXT: slli a2, a1, 6
+; CHECK-RV64-NEXT: bltz a2, .LBB61_646
+; CHECK-RV64-NEXT: j .LBB61_124
; CHECK-RV64-NEXT: .LBB61_646: # %cond.load481
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13232,7 +14460,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_124
+; CHECK-RV64-NEXT: slli a2, a1, 5
+; CHECK-RV64-NEXT: bltz a2, .LBB61_647
+; CHECK-RV64-NEXT: j .LBB61_125
; CHECK-RV64-NEXT: .LBB61_647: # %cond.load485
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13245,7 +14475,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_125
+; CHECK-RV64-NEXT: slli a2, a1, 4
+; CHECK-RV64-NEXT: bltz a2, .LBB61_648
+; CHECK-RV64-NEXT: j .LBB61_126
; CHECK-RV64-NEXT: .LBB61_648: # %cond.load489
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13258,7 +14490,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_126
+; CHECK-RV64-NEXT: slli a2, a1, 3
+; CHECK-RV64-NEXT: bltz a2, .LBB61_649
+; CHECK-RV64-NEXT: j .LBB61_127
; CHECK-RV64-NEXT: .LBB61_649: # %cond.load493
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13271,7 +14505,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_127
+; CHECK-RV64-NEXT: slli a2, a1, 2
+; CHECK-RV64-NEXT: bgez a2, .LBB61_1026
+; CHECK-RV64-NEXT: j .LBB61_128
+; CHECK-RV64-NEXT: .LBB61_1026: # %cond.load493
+; CHECK-RV64-NEXT: j .LBB61_129
; CHECK-RV64-NEXT: .LBB61_650: # %cond.load505
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -13283,7 +14521,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv2r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_132
+; CHECK-RV64-NEXT: andi a1, a2, 1
+; CHECK-RV64-NEXT: bnez a1, .LBB61_651
+; CHECK-RV64-NEXT: j .LBB61_133
; CHECK-RV64-NEXT: .LBB61_651: # %cond.load509
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13296,7 +14536,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_133
+; CHECK-RV64-NEXT: andi a1, a2, 2
+; CHECK-RV64-NEXT: bnez a1, .LBB61_652
+; CHECK-RV64-NEXT: j .LBB61_134
; CHECK-RV64-NEXT: .LBB61_652: # %cond.load513
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13309,7 +14551,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_134
+; CHECK-RV64-NEXT: andi a1, a2, 4
+; CHECK-RV64-NEXT: bnez a1, .LBB61_653
+; CHECK-RV64-NEXT: j .LBB61_135
; CHECK-RV64-NEXT: .LBB61_653: # %cond.load517
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13322,7 +14566,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_135
+; CHECK-RV64-NEXT: andi a1, a2, 8
+; CHECK-RV64-NEXT: bnez a1, .LBB61_654
+; CHECK-RV64-NEXT: j .LBB61_136
; CHECK-RV64-NEXT: .LBB61_654: # %cond.load521
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13335,7 +14581,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_136
+; CHECK-RV64-NEXT: andi a1, a2, 16
+; CHECK-RV64-NEXT: bnez a1, .LBB61_655
+; CHECK-RV64-NEXT: j .LBB61_137
; CHECK-RV64-NEXT: .LBB61_655: # %cond.load525
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13348,8 +14596,10 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_137
-; CHECK-RV64-NEXT: .LBB61_656: # %cond.load529
+; CHECK-RV64-NEXT: andi a1, a2, 32
+; CHECK-RV64-NEXT: bnez a1, .LBB61_656
+; CHECK-RV64-NEXT: j .LBB61_138
+; CHECK-RV64-NEXT: .LBB61_656: # %cond.load529
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -13361,7 +14611,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_138
+; CHECK-RV64-NEXT: andi a1, a2, 64
+; CHECK-RV64-NEXT: bnez a1, .LBB61_657
+; CHECK-RV64-NEXT: j .LBB61_139
; CHECK-RV64-NEXT: .LBB61_657: # %cond.load533
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13374,7 +14626,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_139
+; CHECK-RV64-NEXT: andi a1, a2, 128
+; CHECK-RV64-NEXT: bnez a1, .LBB61_658
+; CHECK-RV64-NEXT: j .LBB61_140
; CHECK-RV64-NEXT: .LBB61_658: # %cond.load537
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13387,7 +14641,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_140
+; CHECK-RV64-NEXT: andi a1, a2, 256
+; CHECK-RV64-NEXT: bnez a1, .LBB61_659
+; CHECK-RV64-NEXT: j .LBB61_141
; CHECK-RV64-NEXT: .LBB61_659: # %cond.load541
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13400,7 +14656,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_141
+; CHECK-RV64-NEXT: andi a1, a2, 512
+; CHECK-RV64-NEXT: bnez a1, .LBB61_660
+; CHECK-RV64-NEXT: j .LBB61_142
; CHECK-RV64-NEXT: .LBB61_660: # %cond.load545
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13413,7 +14671,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_142
+; CHECK-RV64-NEXT: andi a1, a2, 1024
+; CHECK-RV64-NEXT: bnez a1, .LBB61_661
+; CHECK-RV64-NEXT: j .LBB61_143
; CHECK-RV64-NEXT: .LBB61_661: # %cond.load549
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13426,7 +14686,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_143
+; CHECK-RV64-NEXT: slli a1, a2, 52
+; CHECK-RV64-NEXT: bltz a1, .LBB61_662
+; CHECK-RV64-NEXT: j .LBB61_144
; CHECK-RV64-NEXT: .LBB61_662: # %cond.load553
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13439,7 +14701,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_144
+; CHECK-RV64-NEXT: slli a1, a2, 51
+; CHECK-RV64-NEXT: bltz a1, .LBB61_663
+; CHECK-RV64-NEXT: j .LBB61_145
; CHECK-RV64-NEXT: .LBB61_663: # %cond.load557
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13452,7 +14716,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_145
+; CHECK-RV64-NEXT: slli a1, a2, 50
+; CHECK-RV64-NEXT: bltz a1, .LBB61_664
+; CHECK-RV64-NEXT: j .LBB61_146
; CHECK-RV64-NEXT: .LBB61_664: # %cond.load561
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13465,7 +14731,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_146
+; CHECK-RV64-NEXT: slli a1, a2, 49
+; CHECK-RV64-NEXT: bltz a1, .LBB61_665
+; CHECK-RV64-NEXT: j .LBB61_147
; CHECK-RV64-NEXT: .LBB61_665: # %cond.load565
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13478,7 +14746,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_147
+; CHECK-RV64-NEXT: slli a1, a2, 48
+; CHECK-RV64-NEXT: bltz a1, .LBB61_666
+; CHECK-RV64-NEXT: j .LBB61_148
; CHECK-RV64-NEXT: .LBB61_666: # %cond.load569
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13491,7 +14761,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_148
+; CHECK-RV64-NEXT: slli a1, a2, 47
+; CHECK-RV64-NEXT: bltz a1, .LBB61_667
+; CHECK-RV64-NEXT: j .LBB61_149
; CHECK-RV64-NEXT: .LBB61_667: # %cond.load573
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13504,7 +14776,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_149
+; CHECK-RV64-NEXT: slli a1, a2, 46
+; CHECK-RV64-NEXT: bltz a1, .LBB61_668
+; CHECK-RV64-NEXT: j .LBB61_150
; CHECK-RV64-NEXT: .LBB61_668: # %cond.load577
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13517,7 +14791,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_150
+; CHECK-RV64-NEXT: slli a1, a2, 45
+; CHECK-RV64-NEXT: bltz a1, .LBB61_669
+; CHECK-RV64-NEXT: j .LBB61_151
; CHECK-RV64-NEXT: .LBB61_669: # %cond.load581
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13530,7 +14806,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_151
+; CHECK-RV64-NEXT: slli a1, a2, 44
+; CHECK-RV64-NEXT: bltz a1, .LBB61_670
+; CHECK-RV64-NEXT: j .LBB61_152
; CHECK-RV64-NEXT: .LBB61_670: # %cond.load585
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13543,7 +14821,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_152
+; CHECK-RV64-NEXT: slli a1, a2, 43
+; CHECK-RV64-NEXT: bltz a1, .LBB61_671
+; CHECK-RV64-NEXT: j .LBB61_153
; CHECK-RV64-NEXT: .LBB61_671: # %cond.load589
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13556,7 +14836,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_153
+; CHECK-RV64-NEXT: slli a1, a2, 42
+; CHECK-RV64-NEXT: bltz a1, .LBB61_672
+; CHECK-RV64-NEXT: j .LBB61_154
; CHECK-RV64-NEXT: .LBB61_672: # %cond.load593
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13569,7 +14851,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_154
+; CHECK-RV64-NEXT: slli a1, a2, 41
+; CHECK-RV64-NEXT: bltz a1, .LBB61_673
+; CHECK-RV64-NEXT: j .LBB61_155
; CHECK-RV64-NEXT: .LBB61_673: # %cond.load597
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13582,7 +14866,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_155
+; CHECK-RV64-NEXT: slli a1, a2, 40
+; CHECK-RV64-NEXT: bltz a1, .LBB61_674
+; CHECK-RV64-NEXT: j .LBB61_156
; CHECK-RV64-NEXT: .LBB61_674: # %cond.load601
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13595,7 +14881,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_156
+; CHECK-RV64-NEXT: slli a1, a2, 39
+; CHECK-RV64-NEXT: bltz a1, .LBB61_675
+; CHECK-RV64-NEXT: j .LBB61_157
; CHECK-RV64-NEXT: .LBB61_675: # %cond.load605
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13608,7 +14896,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_157
+; CHECK-RV64-NEXT: slli a1, a2, 38
+; CHECK-RV64-NEXT: bltz a1, .LBB61_676
+; CHECK-RV64-NEXT: j .LBB61_158
; CHECK-RV64-NEXT: .LBB61_676: # %cond.load609
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13621,7 +14911,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_158
+; CHECK-RV64-NEXT: slli a1, a2, 37
+; CHECK-RV64-NEXT: bltz a1, .LBB61_677
+; CHECK-RV64-NEXT: j .LBB61_159
; CHECK-RV64-NEXT: .LBB61_677: # %cond.load613
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13634,7 +14926,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_159
+; CHECK-RV64-NEXT: slli a1, a2, 36
+; CHECK-RV64-NEXT: bltz a1, .LBB61_678
+; CHECK-RV64-NEXT: j .LBB61_160
; CHECK-RV64-NEXT: .LBB61_678: # %cond.load617
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13647,7 +14941,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_160
+; CHECK-RV64-NEXT: slli a1, a2, 35
+; CHECK-RV64-NEXT: bltz a1, .LBB61_679
+; CHECK-RV64-NEXT: j .LBB61_161
; CHECK-RV64-NEXT: .LBB61_679: # %cond.load621
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13660,7 +14956,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_161
+; CHECK-RV64-NEXT: slli a1, a2, 34
+; CHECK-RV64-NEXT: bltz a1, .LBB61_680
+; CHECK-RV64-NEXT: j .LBB61_162
; CHECK-RV64-NEXT: .LBB61_680: # %cond.load625
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13673,7 +14971,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_162
+; CHECK-RV64-NEXT: slli a1, a2, 33
+; CHECK-RV64-NEXT: bltz a1, .LBB61_681
+; CHECK-RV64-NEXT: j .LBB61_163
; CHECK-RV64-NEXT: .LBB61_681: # %cond.load629
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13686,7 +14986,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_163
+; CHECK-RV64-NEXT: slli a1, a2, 32
+; CHECK-RV64-NEXT: bltz a1, .LBB61_682
+; CHECK-RV64-NEXT: j .LBB61_164
; CHECK-RV64-NEXT: .LBB61_682: # %cond.load633
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13699,7 +15001,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_164
+; CHECK-RV64-NEXT: slli a1, a2, 31
+; CHECK-RV64-NEXT: bltz a1, .LBB61_683
+; CHECK-RV64-NEXT: j .LBB61_165
; CHECK-RV64-NEXT: .LBB61_683: # %cond.load637
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13712,7 +15016,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_165
+; CHECK-RV64-NEXT: slli a1, a2, 30
+; CHECK-RV64-NEXT: bltz a1, .LBB61_684
+; CHECK-RV64-NEXT: j .LBB61_166
; CHECK-RV64-NEXT: .LBB61_684: # %cond.load641
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13725,7 +15031,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_166
+; CHECK-RV64-NEXT: slli a1, a2, 29
+; CHECK-RV64-NEXT: bltz a1, .LBB61_685
+; CHECK-RV64-NEXT: j .LBB61_167
; CHECK-RV64-NEXT: .LBB61_685: # %cond.load645
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13738,7 +15046,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_167
+; CHECK-RV64-NEXT: slli a1, a2, 28
+; CHECK-RV64-NEXT: bltz a1, .LBB61_686
+; CHECK-RV64-NEXT: j .LBB61_168
; CHECK-RV64-NEXT: .LBB61_686: # %cond.load649
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13751,7 +15061,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_168
+; CHECK-RV64-NEXT: slli a1, a2, 27
+; CHECK-RV64-NEXT: bltz a1, .LBB61_687
+; CHECK-RV64-NEXT: j .LBB61_169
; CHECK-RV64-NEXT: .LBB61_687: # %cond.load653
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13764,7 +15076,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_169
+; CHECK-RV64-NEXT: slli a1, a2, 26
+; CHECK-RV64-NEXT: bltz a1, .LBB61_688
+; CHECK-RV64-NEXT: j .LBB61_170
; CHECK-RV64-NEXT: .LBB61_688: # %cond.load657
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13777,7 +15091,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_170
+; CHECK-RV64-NEXT: slli a1, a2, 25
+; CHECK-RV64-NEXT: bltz a1, .LBB61_689
+; CHECK-RV64-NEXT: j .LBB61_171
; CHECK-RV64-NEXT: .LBB61_689: # %cond.load661
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13790,7 +15106,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_171
+; CHECK-RV64-NEXT: slli a1, a2, 24
+; CHECK-RV64-NEXT: bltz a1, .LBB61_690
+; CHECK-RV64-NEXT: j .LBB61_172
; CHECK-RV64-NEXT: .LBB61_690: # %cond.load665
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13803,7 +15121,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_172
+; CHECK-RV64-NEXT: slli a1, a2, 23
+; CHECK-RV64-NEXT: bltz a1, .LBB61_691
+; CHECK-RV64-NEXT: j .LBB61_173
; CHECK-RV64-NEXT: .LBB61_691: # %cond.load669
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13816,7 +15136,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_173
+; CHECK-RV64-NEXT: slli a1, a2, 22
+; CHECK-RV64-NEXT: bltz a1, .LBB61_692
+; CHECK-RV64-NEXT: j .LBB61_174
; CHECK-RV64-NEXT: .LBB61_692: # %cond.load673
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13829,7 +15151,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_174
+; CHECK-RV64-NEXT: slli a1, a2, 21
+; CHECK-RV64-NEXT: bltz a1, .LBB61_693
+; CHECK-RV64-NEXT: j .LBB61_175
; CHECK-RV64-NEXT: .LBB61_693: # %cond.load677
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13842,7 +15166,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_175
+; CHECK-RV64-NEXT: slli a1, a2, 20
+; CHECK-RV64-NEXT: bltz a1, .LBB61_694
+; CHECK-RV64-NEXT: j .LBB61_176
; CHECK-RV64-NEXT: .LBB61_694: # %cond.load681
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13855,7 +15181,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_176
+; CHECK-RV64-NEXT: slli a1, a2, 19
+; CHECK-RV64-NEXT: bltz a1, .LBB61_695
+; CHECK-RV64-NEXT: j .LBB61_177
; CHECK-RV64-NEXT: .LBB61_695: # %cond.load685
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13868,7 +15196,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_177
+; CHECK-RV64-NEXT: slli a1, a2, 18
+; CHECK-RV64-NEXT: bltz a1, .LBB61_696
+; CHECK-RV64-NEXT: j .LBB61_178
; CHECK-RV64-NEXT: .LBB61_696: # %cond.load689
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13881,7 +15211,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_178
+; CHECK-RV64-NEXT: slli a1, a2, 17
+; CHECK-RV64-NEXT: bltz a1, .LBB61_697
+; CHECK-RV64-NEXT: j .LBB61_179
; CHECK-RV64-NEXT: .LBB61_697: # %cond.load693
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13894,7 +15226,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_179
+; CHECK-RV64-NEXT: slli a1, a2, 16
+; CHECK-RV64-NEXT: bltz a1, .LBB61_698
+; CHECK-RV64-NEXT: j .LBB61_180
; CHECK-RV64-NEXT: .LBB61_698: # %cond.load697
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13907,7 +15241,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_180
+; CHECK-RV64-NEXT: slli a1, a2, 15
+; CHECK-RV64-NEXT: bltz a1, .LBB61_699
+; CHECK-RV64-NEXT: j .LBB61_181
; CHECK-RV64-NEXT: .LBB61_699: # %cond.load701
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13920,7 +15256,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_181
+; CHECK-RV64-NEXT: slli a1, a2, 14
+; CHECK-RV64-NEXT: bltz a1, .LBB61_700
+; CHECK-RV64-NEXT: j .LBB61_182
; CHECK-RV64-NEXT: .LBB61_700: # %cond.load705
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13933,7 +15271,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_182
+; CHECK-RV64-NEXT: slli a1, a2, 13
+; CHECK-RV64-NEXT: bltz a1, .LBB61_701
+; CHECK-RV64-NEXT: j .LBB61_183
; CHECK-RV64-NEXT: .LBB61_701: # %cond.load709
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13946,7 +15286,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_183
+; CHECK-RV64-NEXT: slli a1, a2, 12
+; CHECK-RV64-NEXT: bltz a1, .LBB61_702
+; CHECK-RV64-NEXT: j .LBB61_184
; CHECK-RV64-NEXT: .LBB61_702: # %cond.load713
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13959,7 +15301,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_184
+; CHECK-RV64-NEXT: slli a1, a2, 11
+; CHECK-RV64-NEXT: bltz a1, .LBB61_703
+; CHECK-RV64-NEXT: j .LBB61_185
; CHECK-RV64-NEXT: .LBB61_703: # %cond.load717
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13972,7 +15316,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_185
+; CHECK-RV64-NEXT: slli a1, a2, 10
+; CHECK-RV64-NEXT: bltz a1, .LBB61_704
+; CHECK-RV64-NEXT: j .LBB61_186
; CHECK-RV64-NEXT: .LBB61_704: # %cond.load721
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13985,7 +15331,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_186
+; CHECK-RV64-NEXT: slli a1, a2, 9
+; CHECK-RV64-NEXT: bltz a1, .LBB61_705
+; CHECK-RV64-NEXT: j .LBB61_187
; CHECK-RV64-NEXT: .LBB61_705: # %cond.load725
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -13998,7 +15346,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_187
+; CHECK-RV64-NEXT: slli a1, a2, 8
+; CHECK-RV64-NEXT: bltz a1, .LBB61_706
+; CHECK-RV64-NEXT: j .LBB61_188
; CHECK-RV64-NEXT: .LBB61_706: # %cond.load729
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14011,7 +15361,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_188
+; CHECK-RV64-NEXT: slli a1, a2, 7
+; CHECK-RV64-NEXT: bltz a1, .LBB61_707
+; CHECK-RV64-NEXT: j .LBB61_189
; CHECK-RV64-NEXT: .LBB61_707: # %cond.load733
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14024,7 +15376,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_189
+; CHECK-RV64-NEXT: slli a1, a2, 6
+; CHECK-RV64-NEXT: bltz a1, .LBB61_708
+; CHECK-RV64-NEXT: j .LBB61_190
; CHECK-RV64-NEXT: .LBB61_708: # %cond.load737
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14037,7 +15391,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_190
+; CHECK-RV64-NEXT: slli a1, a2, 5
+; CHECK-RV64-NEXT: bltz a1, .LBB61_709
+; CHECK-RV64-NEXT: j .LBB61_191
; CHECK-RV64-NEXT: .LBB61_709: # %cond.load741
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14050,7 +15406,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_191
+; CHECK-RV64-NEXT: slli a1, a2, 4
+; CHECK-RV64-NEXT: bltz a1, .LBB61_710
+; CHECK-RV64-NEXT: j .LBB61_192
; CHECK-RV64-NEXT: .LBB61_710: # %cond.load745
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14063,7 +15421,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_192
+; CHECK-RV64-NEXT: slli a1, a2, 3
+; CHECK-RV64-NEXT: bltz a1, .LBB61_711
+; CHECK-RV64-NEXT: j .LBB61_193
; CHECK-RV64-NEXT: .LBB61_711: # %cond.load749
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14076,7 +15436,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_193
+; CHECK-RV64-NEXT: slli a1, a2, 2
+; CHECK-RV64-NEXT: bgez a1, .LBB61_1027
+; CHECK-RV64-NEXT: j .LBB61_194
+; CHECK-RV64-NEXT: .LBB61_1027: # %cond.load749
+; CHECK-RV64-NEXT: j .LBB61_195
; CHECK-RV64-NEXT: .LBB61_712: # %cond.load761
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -14088,7 +15452,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_198
+; CHECK-RV64-NEXT: andi a2, a1, 1
+; CHECK-RV64-NEXT: bnez a2, .LBB61_713
+; CHECK-RV64-NEXT: j .LBB61_199
; CHECK-RV64-NEXT: .LBB61_713: # %cond.load765
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14101,7 +15467,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_199
+; CHECK-RV64-NEXT: andi a2, a1, 2
+; CHECK-RV64-NEXT: bnez a2, .LBB61_714
+; CHECK-RV64-NEXT: j .LBB61_200
; CHECK-RV64-NEXT: .LBB61_714: # %cond.load769
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14114,7 +15482,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_200
+; CHECK-RV64-NEXT: andi a2, a1, 4
+; CHECK-RV64-NEXT: bnez a2, .LBB61_715
+; CHECK-RV64-NEXT: j .LBB61_201
; CHECK-RV64-NEXT: .LBB61_715: # %cond.load773
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14127,7 +15497,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_201
+; CHECK-RV64-NEXT: andi a2, a1, 8
+; CHECK-RV64-NEXT: bnez a2, .LBB61_716
+; CHECK-RV64-NEXT: j .LBB61_202
; CHECK-RV64-NEXT: .LBB61_716: # %cond.load777
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14140,7 +15512,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_202
+; CHECK-RV64-NEXT: andi a2, a1, 16
+; CHECK-RV64-NEXT: bnez a2, .LBB61_717
+; CHECK-RV64-NEXT: j .LBB61_203
; CHECK-RV64-NEXT: .LBB61_717: # %cond.load781
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14153,7 +15527,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_203
+; CHECK-RV64-NEXT: andi a2, a1, 32
+; CHECK-RV64-NEXT: bnez a2, .LBB61_718
+; CHECK-RV64-NEXT: j .LBB61_204
; CHECK-RV64-NEXT: .LBB61_718: # %cond.load785
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14166,7 +15542,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_204
+; CHECK-RV64-NEXT: andi a2, a1, 64
+; CHECK-RV64-NEXT: bnez a2, .LBB61_719
+; CHECK-RV64-NEXT: j .LBB61_205
; CHECK-RV64-NEXT: .LBB61_719: # %cond.load789
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14179,7 +15557,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_205
+; CHECK-RV64-NEXT: andi a2, a1, 128
+; CHECK-RV64-NEXT: bnez a2, .LBB61_720
+; CHECK-RV64-NEXT: j .LBB61_206
; CHECK-RV64-NEXT: .LBB61_720: # %cond.load793
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14192,7 +15572,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_206
+; CHECK-RV64-NEXT: andi a2, a1, 256
+; CHECK-RV64-NEXT: bnez a2, .LBB61_721
+; CHECK-RV64-NEXT: j .LBB61_207
; CHECK-RV64-NEXT: .LBB61_721: # %cond.load797
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14205,7 +15587,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_207
+; CHECK-RV64-NEXT: andi a2, a1, 512
+; CHECK-RV64-NEXT: bnez a2, .LBB61_722
+; CHECK-RV64-NEXT: j .LBB61_208
; CHECK-RV64-NEXT: .LBB61_722: # %cond.load801
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14218,7 +15602,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_208
+; CHECK-RV64-NEXT: andi a2, a1, 1024
+; CHECK-RV64-NEXT: bnez a2, .LBB61_723
+; CHECK-RV64-NEXT: j .LBB61_209
; CHECK-RV64-NEXT: .LBB61_723: # %cond.load805
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14231,7 +15617,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_209
+; CHECK-RV64-NEXT: slli a2, a1, 52
+; CHECK-RV64-NEXT: bltz a2, .LBB61_724
+; CHECK-RV64-NEXT: j .LBB61_210
; CHECK-RV64-NEXT: .LBB61_724: # %cond.load809
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14244,7 +15632,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_210
+; CHECK-RV64-NEXT: slli a2, a1, 51
+; CHECK-RV64-NEXT: bltz a2, .LBB61_725
+; CHECK-RV64-NEXT: j .LBB61_211
; CHECK-RV64-NEXT: .LBB61_725: # %cond.load813
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14257,7 +15647,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_211
+; CHECK-RV64-NEXT: slli a2, a1, 50
+; CHECK-RV64-NEXT: bltz a2, .LBB61_726
+; CHECK-RV64-NEXT: j .LBB61_212
; CHECK-RV64-NEXT: .LBB61_726: # %cond.load817
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14270,7 +15662,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_212
+; CHECK-RV64-NEXT: slli a2, a1, 49
+; CHECK-RV64-NEXT: bltz a2, .LBB61_727
+; CHECK-RV64-NEXT: j .LBB61_213
; CHECK-RV64-NEXT: .LBB61_727: # %cond.load821
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14283,7 +15677,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_213
+; CHECK-RV64-NEXT: slli a2, a1, 48
+; CHECK-RV64-NEXT: bltz a2, .LBB61_728
+; CHECK-RV64-NEXT: j .LBB61_214
; CHECK-RV64-NEXT: .LBB61_728: # %cond.load825
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14296,7 +15692,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_214
+; CHECK-RV64-NEXT: slli a2, a1, 47
+; CHECK-RV64-NEXT: bltz a2, .LBB61_729
+; CHECK-RV64-NEXT: j .LBB61_215
; CHECK-RV64-NEXT: .LBB61_729: # %cond.load829
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14309,7 +15707,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_215
+; CHECK-RV64-NEXT: slli a2, a1, 46
+; CHECK-RV64-NEXT: bltz a2, .LBB61_730
+; CHECK-RV64-NEXT: j .LBB61_216
; CHECK-RV64-NEXT: .LBB61_730: # %cond.load833
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14322,7 +15722,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_216
+; CHECK-RV64-NEXT: slli a2, a1, 45
+; CHECK-RV64-NEXT: bltz a2, .LBB61_731
+; CHECK-RV64-NEXT: j .LBB61_217
; CHECK-RV64-NEXT: .LBB61_731: # %cond.load837
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14335,7 +15737,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_217
+; CHECK-RV64-NEXT: slli a2, a1, 44
+; CHECK-RV64-NEXT: bltz a2, .LBB61_732
+; CHECK-RV64-NEXT: j .LBB61_218
; CHECK-RV64-NEXT: .LBB61_732: # %cond.load841
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14348,7 +15752,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_218
+; CHECK-RV64-NEXT: slli a2, a1, 43
+; CHECK-RV64-NEXT: bltz a2, .LBB61_733
+; CHECK-RV64-NEXT: j .LBB61_219
; CHECK-RV64-NEXT: .LBB61_733: # %cond.load845
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14361,7 +15767,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_219
+; CHECK-RV64-NEXT: slli a2, a1, 42
+; CHECK-RV64-NEXT: bltz a2, .LBB61_734
+; CHECK-RV64-NEXT: j .LBB61_220
; CHECK-RV64-NEXT: .LBB61_734: # %cond.load849
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14374,7 +15782,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_220
+; CHECK-RV64-NEXT: slli a2, a1, 41
+; CHECK-RV64-NEXT: bltz a2, .LBB61_735
+; CHECK-RV64-NEXT: j .LBB61_221
; CHECK-RV64-NEXT: .LBB61_735: # %cond.load853
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14387,7 +15797,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_221
+; CHECK-RV64-NEXT: slli a2, a1, 40
+; CHECK-RV64-NEXT: bltz a2, .LBB61_736
+; CHECK-RV64-NEXT: j .LBB61_222
; CHECK-RV64-NEXT: .LBB61_736: # %cond.load857
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14400,7 +15812,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_222
+; CHECK-RV64-NEXT: slli a2, a1, 39
+; CHECK-RV64-NEXT: bltz a2, .LBB61_737
+; CHECK-RV64-NEXT: j .LBB61_223
; CHECK-RV64-NEXT: .LBB61_737: # %cond.load861
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14413,7 +15827,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_223
+; CHECK-RV64-NEXT: slli a2, a1, 38
+; CHECK-RV64-NEXT: bltz a2, .LBB61_738
+; CHECK-RV64-NEXT: j .LBB61_224
; CHECK-RV64-NEXT: .LBB61_738: # %cond.load865
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14426,7 +15842,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_224
+; CHECK-RV64-NEXT: slli a2, a1, 37
+; CHECK-RV64-NEXT: bltz a2, .LBB61_739
+; CHECK-RV64-NEXT: j .LBB61_225
; CHECK-RV64-NEXT: .LBB61_739: # %cond.load869
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14439,7 +15857,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_225
+; CHECK-RV64-NEXT: slli a2, a1, 36
+; CHECK-RV64-NEXT: bltz a2, .LBB61_740
+; CHECK-RV64-NEXT: j .LBB61_226
; CHECK-RV64-NEXT: .LBB61_740: # %cond.load873
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14452,7 +15872,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_226
+; CHECK-RV64-NEXT: slli a2, a1, 35
+; CHECK-RV64-NEXT: bltz a2, .LBB61_741
+; CHECK-RV64-NEXT: j .LBB61_227
; CHECK-RV64-NEXT: .LBB61_741: # %cond.load877
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14465,7 +15887,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_227
+; CHECK-RV64-NEXT: slli a2, a1, 34
+; CHECK-RV64-NEXT: bltz a2, .LBB61_742
+; CHECK-RV64-NEXT: j .LBB61_228
; CHECK-RV64-NEXT: .LBB61_742: # %cond.load881
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14478,7 +15902,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_228
+; CHECK-RV64-NEXT: slli a2, a1, 33
+; CHECK-RV64-NEXT: bltz a2, .LBB61_743
+; CHECK-RV64-NEXT: j .LBB61_229
; CHECK-RV64-NEXT: .LBB61_743: # %cond.load885
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14491,7 +15917,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_229
+; CHECK-RV64-NEXT: slli a2, a1, 32
+; CHECK-RV64-NEXT: bltz a2, .LBB61_744
+; CHECK-RV64-NEXT: j .LBB61_230
; CHECK-RV64-NEXT: .LBB61_744: # %cond.load889
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14504,7 +15932,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_230
+; CHECK-RV64-NEXT: slli a2, a1, 31
+; CHECK-RV64-NEXT: bltz a2, .LBB61_745
+; CHECK-RV64-NEXT: j .LBB61_231
; CHECK-RV64-NEXT: .LBB61_745: # %cond.load893
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14517,7 +15947,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_231
+; CHECK-RV64-NEXT: slli a2, a1, 30
+; CHECK-RV64-NEXT: bltz a2, .LBB61_746
+; CHECK-RV64-NEXT: j .LBB61_232
; CHECK-RV64-NEXT: .LBB61_746: # %cond.load897
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14530,7 +15962,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_232
+; CHECK-RV64-NEXT: slli a2, a1, 29
+; CHECK-RV64-NEXT: bltz a2, .LBB61_747
+; CHECK-RV64-NEXT: j .LBB61_233
; CHECK-RV64-NEXT: .LBB61_747: # %cond.load901
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14543,7 +15977,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_233
+; CHECK-RV64-NEXT: slli a2, a1, 28
+; CHECK-RV64-NEXT: bltz a2, .LBB61_748
+; CHECK-RV64-NEXT: j .LBB61_234
; CHECK-RV64-NEXT: .LBB61_748: # %cond.load905
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14556,7 +15992,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_234
+; CHECK-RV64-NEXT: slli a2, a1, 27
+; CHECK-RV64-NEXT: bltz a2, .LBB61_749
+; CHECK-RV64-NEXT: j .LBB61_235
; CHECK-RV64-NEXT: .LBB61_749: # %cond.load909
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14569,7 +16007,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_235
+; CHECK-RV64-NEXT: slli a2, a1, 26
+; CHECK-RV64-NEXT: bltz a2, .LBB61_750
+; CHECK-RV64-NEXT: j .LBB61_236
; CHECK-RV64-NEXT: .LBB61_750: # %cond.load913
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14582,7 +16022,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_236
+; CHECK-RV64-NEXT: slli a2, a1, 25
+; CHECK-RV64-NEXT: bltz a2, .LBB61_751
+; CHECK-RV64-NEXT: j .LBB61_237
; CHECK-RV64-NEXT: .LBB61_751: # %cond.load917
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14595,7 +16037,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_237
+; CHECK-RV64-NEXT: slli a2, a1, 24
+; CHECK-RV64-NEXT: bltz a2, .LBB61_752
+; CHECK-RV64-NEXT: j .LBB61_238
; CHECK-RV64-NEXT: .LBB61_752: # %cond.load921
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14608,7 +16052,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_238
+; CHECK-RV64-NEXT: slli a2, a1, 23
+; CHECK-RV64-NEXT: bltz a2, .LBB61_753
+; CHECK-RV64-NEXT: j .LBB61_239
; CHECK-RV64-NEXT: .LBB61_753: # %cond.load925
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14621,7 +16067,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_239
+; CHECK-RV64-NEXT: slli a2, a1, 22
+; CHECK-RV64-NEXT: bltz a2, .LBB61_754
+; CHECK-RV64-NEXT: j .LBB61_240
; CHECK-RV64-NEXT: .LBB61_754: # %cond.load929
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14634,7 +16082,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_240
+; CHECK-RV64-NEXT: slli a2, a1, 21
+; CHECK-RV64-NEXT: bltz a2, .LBB61_755
+; CHECK-RV64-NEXT: j .LBB61_241
; CHECK-RV64-NEXT: .LBB61_755: # %cond.load933
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14647,7 +16097,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_241
+; CHECK-RV64-NEXT: slli a2, a1, 20
+; CHECK-RV64-NEXT: bltz a2, .LBB61_756
+; CHECK-RV64-NEXT: j .LBB61_242
; CHECK-RV64-NEXT: .LBB61_756: # %cond.load937
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14660,7 +16112,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_242
+; CHECK-RV64-NEXT: slli a2, a1, 19
+; CHECK-RV64-NEXT: bltz a2, .LBB61_757
+; CHECK-RV64-NEXT: j .LBB61_243
; CHECK-RV64-NEXT: .LBB61_757: # %cond.load941
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14673,7 +16127,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_243
+; CHECK-RV64-NEXT: slli a2, a1, 18
+; CHECK-RV64-NEXT: bltz a2, .LBB61_758
+; CHECK-RV64-NEXT: j .LBB61_244
; CHECK-RV64-NEXT: .LBB61_758: # %cond.load945
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14686,7 +16142,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_244
+; CHECK-RV64-NEXT: slli a2, a1, 17
+; CHECK-RV64-NEXT: bltz a2, .LBB61_759
+; CHECK-RV64-NEXT: j .LBB61_245
; CHECK-RV64-NEXT: .LBB61_759: # %cond.load949
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14699,7 +16157,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_245
+; CHECK-RV64-NEXT: slli a2, a1, 16
+; CHECK-RV64-NEXT: bltz a2, .LBB61_760
+; CHECK-RV64-NEXT: j .LBB61_246
; CHECK-RV64-NEXT: .LBB61_760: # %cond.load953
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14712,7 +16172,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_246
+; CHECK-RV64-NEXT: slli a2, a1, 15
+; CHECK-RV64-NEXT: bltz a2, .LBB61_761
+; CHECK-RV64-NEXT: j .LBB61_247
; CHECK-RV64-NEXT: .LBB61_761: # %cond.load957
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14725,7 +16187,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_247
+; CHECK-RV64-NEXT: slli a2, a1, 14
+; CHECK-RV64-NEXT: bltz a2, .LBB61_762
+; CHECK-RV64-NEXT: j .LBB61_248
; CHECK-RV64-NEXT: .LBB61_762: # %cond.load961
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14738,7 +16202,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_248
+; CHECK-RV64-NEXT: slli a2, a1, 13
+; CHECK-RV64-NEXT: bltz a2, .LBB61_763
+; CHECK-RV64-NEXT: j .LBB61_249
; CHECK-RV64-NEXT: .LBB61_763: # %cond.load965
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14751,7 +16217,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_249
+; CHECK-RV64-NEXT: slli a2, a1, 12
+; CHECK-RV64-NEXT: bltz a2, .LBB61_764
+; CHECK-RV64-NEXT: j .LBB61_250
; CHECK-RV64-NEXT: .LBB61_764: # %cond.load969
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14764,7 +16232,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_250
+; CHECK-RV64-NEXT: slli a2, a1, 11
+; CHECK-RV64-NEXT: bltz a2, .LBB61_765
+; CHECK-RV64-NEXT: j .LBB61_251
; CHECK-RV64-NEXT: .LBB61_765: # %cond.load973
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14777,7 +16247,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_251
+; CHECK-RV64-NEXT: slli a2, a1, 10
+; CHECK-RV64-NEXT: bltz a2, .LBB61_766
+; CHECK-RV64-NEXT: j .LBB61_252
; CHECK-RV64-NEXT: .LBB61_766: # %cond.load977
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14790,7 +16262,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_252
+; CHECK-RV64-NEXT: slli a2, a1, 9
+; CHECK-RV64-NEXT: bltz a2, .LBB61_767
+; CHECK-RV64-NEXT: j .LBB61_253
; CHECK-RV64-NEXT: .LBB61_767: # %cond.load981
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14803,7 +16277,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_253
+; CHECK-RV64-NEXT: slli a2, a1, 8
+; CHECK-RV64-NEXT: bltz a2, .LBB61_768
+; CHECK-RV64-NEXT: j .LBB61_254
; CHECK-RV64-NEXT: .LBB61_768: # %cond.load985
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14816,7 +16292,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_254
+; CHECK-RV64-NEXT: slli a2, a1, 7
+; CHECK-RV64-NEXT: bltz a2, .LBB61_769
+; CHECK-RV64-NEXT: j .LBB61_255
; CHECK-RV64-NEXT: .LBB61_769: # %cond.load989
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14829,7 +16307,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_255
+; CHECK-RV64-NEXT: slli a2, a1, 6
+; CHECK-RV64-NEXT: bltz a2, .LBB61_770
+; CHECK-RV64-NEXT: j .LBB61_256
; CHECK-RV64-NEXT: .LBB61_770: # %cond.load993
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14842,7 +16322,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_256
+; CHECK-RV64-NEXT: slli a2, a1, 5
+; CHECK-RV64-NEXT: bltz a2, .LBB61_771
+; CHECK-RV64-NEXT: j .LBB61_257
; CHECK-RV64-NEXT: .LBB61_771: # %cond.load997
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14855,7 +16337,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_257
+; CHECK-RV64-NEXT: slli a2, a1, 4
+; CHECK-RV64-NEXT: bltz a2, .LBB61_772
+; CHECK-RV64-NEXT: j .LBB61_258
; CHECK-RV64-NEXT: .LBB61_772: # %cond.load1001
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14868,7 +16352,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_258
+; CHECK-RV64-NEXT: slli a2, a1, 3
+; CHECK-RV64-NEXT: bltz a2, .LBB61_773
+; CHECK-RV64-NEXT: j .LBB61_259
; CHECK-RV64-NEXT: .LBB61_773: # %cond.load1005
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -14881,7 +16367,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_259
+; CHECK-RV64-NEXT: slli a2, a1, 2
+; CHECK-RV64-NEXT: bgez a2, .LBB61_1028
+; CHECK-RV64-NEXT: j .LBB61_260
+; CHECK-RV64-NEXT: .LBB61_1028: # %cond.load1005
+; CHECK-RV64-NEXT: j .LBB61_261
; CHECK-RV64-NEXT: .LBB61_774: # %cond.load1017
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vmv8r.v v16, v8
@@ -14893,7 +16383,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: addi a0, a0, 1
; CHECK-RV64-NEXT: vmv4r.v v16, v8
; CHECK-RV64-NEXT: vmv8r.v v8, v16
-; CHECK-RV64-NEXT: j .LBB61_264
+; CHECK-RV64-NEXT: andi a1, a2, 1
+; CHECK-RV64-NEXT: bnez a1, .LBB61_775
+; CHECK-RV64-NEXT: j .LBB61_265
; CHECK-RV64-NEXT: .LBB61_775: # %cond.load1021
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -14904,7 +16396,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_265
+; CHECK-RV64-NEXT: andi a1, a2, 2
+; CHECK-RV64-NEXT: bnez a1, .LBB61_776
+; CHECK-RV64-NEXT: j .LBB61_266
; CHECK-RV64-NEXT: .LBB61_776: # %cond.load1025
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -14915,7 +16409,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_266
+; CHECK-RV64-NEXT: andi a1, a2, 4
+; CHECK-RV64-NEXT: bnez a1, .LBB61_777
+; CHECK-RV64-NEXT: j .LBB61_267
; CHECK-RV64-NEXT: .LBB61_777: # %cond.load1029
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -14926,7 +16422,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_267
+; CHECK-RV64-NEXT: andi a1, a2, 8
+; CHECK-RV64-NEXT: bnez a1, .LBB61_778
+; CHECK-RV64-NEXT: j .LBB61_268
; CHECK-RV64-NEXT: .LBB61_778: # %cond.load1033
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -14937,7 +16435,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_268
+; CHECK-RV64-NEXT: andi a1, a2, 16
+; CHECK-RV64-NEXT: bnez a1, .LBB61_779
+; CHECK-RV64-NEXT: j .LBB61_269
; CHECK-RV64-NEXT: .LBB61_779: # %cond.load1037
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -14948,7 +16448,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_269
+; CHECK-RV64-NEXT: andi a1, a2, 32
+; CHECK-RV64-NEXT: bnez a1, .LBB61_780
+; CHECK-RV64-NEXT: j .LBB61_270
; CHECK-RV64-NEXT: .LBB61_780: # %cond.load1041
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -14959,7 +16461,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_270
+; CHECK-RV64-NEXT: andi a1, a2, 64
+; CHECK-RV64-NEXT: bnez a1, .LBB61_781
+; CHECK-RV64-NEXT: j .LBB61_271
; CHECK-RV64-NEXT: .LBB61_781: # %cond.load1045
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -14970,7 +16474,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_271
+; CHECK-RV64-NEXT: andi a1, a2, 128
+; CHECK-RV64-NEXT: bnez a1, .LBB61_782
+; CHECK-RV64-NEXT: j .LBB61_272
; CHECK-RV64-NEXT: .LBB61_782: # %cond.load1049
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -14981,7 +16487,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_272
+; CHECK-RV64-NEXT: andi a1, a2, 256
+; CHECK-RV64-NEXT: bnez a1, .LBB61_783
+; CHECK-RV64-NEXT: j .LBB61_273
; CHECK-RV64-NEXT: .LBB61_783: # %cond.load1053
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -14992,7 +16500,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_273
+; CHECK-RV64-NEXT: andi a1, a2, 512
+; CHECK-RV64-NEXT: bnez a1, .LBB61_784
+; CHECK-RV64-NEXT: j .LBB61_274
; CHECK-RV64-NEXT: .LBB61_784: # %cond.load1057
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15003,7 +16513,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_274
+; CHECK-RV64-NEXT: andi a1, a2, 1024
+; CHECK-RV64-NEXT: bnez a1, .LBB61_785
+; CHECK-RV64-NEXT: j .LBB61_275
; CHECK-RV64-NEXT: .LBB61_785: # %cond.load1061
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15014,7 +16526,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_275
+; CHECK-RV64-NEXT: slli a1, a2, 52
+; CHECK-RV64-NEXT: bltz a1, .LBB61_786
+; CHECK-RV64-NEXT: j .LBB61_276
; CHECK-RV64-NEXT: .LBB61_786: # %cond.load1065
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15025,7 +16539,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_276
+; CHECK-RV64-NEXT: slli a1, a2, 51
+; CHECK-RV64-NEXT: bltz a1, .LBB61_787
+; CHECK-RV64-NEXT: j .LBB61_277
; CHECK-RV64-NEXT: .LBB61_787: # %cond.load1069
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15036,7 +16552,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_277
+; CHECK-RV64-NEXT: slli a1, a2, 50
+; CHECK-RV64-NEXT: bltz a1, .LBB61_788
+; CHECK-RV64-NEXT: j .LBB61_278
; CHECK-RV64-NEXT: .LBB61_788: # %cond.load1073
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15047,7 +16565,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_278
+; CHECK-RV64-NEXT: slli a1, a2, 49
+; CHECK-RV64-NEXT: bltz a1, .LBB61_789
+; CHECK-RV64-NEXT: j .LBB61_279
; CHECK-RV64-NEXT: .LBB61_789: # %cond.load1077
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15058,7 +16578,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_279
+; CHECK-RV64-NEXT: slli a1, a2, 48
+; CHECK-RV64-NEXT: bltz a1, .LBB61_790
+; CHECK-RV64-NEXT: j .LBB61_280
; CHECK-RV64-NEXT: .LBB61_790: # %cond.load1081
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15069,7 +16591,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_280
+; CHECK-RV64-NEXT: slli a1, a2, 47
+; CHECK-RV64-NEXT: bltz a1, .LBB61_791
+; CHECK-RV64-NEXT: j .LBB61_281
; CHECK-RV64-NEXT: .LBB61_791: # %cond.load1085
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15080,7 +16604,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_281
+; CHECK-RV64-NEXT: slli a1, a2, 46
+; CHECK-RV64-NEXT: bltz a1, .LBB61_792
+; CHECK-RV64-NEXT: j .LBB61_282
; CHECK-RV64-NEXT: .LBB61_792: # %cond.load1089
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15091,7 +16617,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_282
+; CHECK-RV64-NEXT: slli a1, a2, 45
+; CHECK-RV64-NEXT: bltz a1, .LBB61_793
+; CHECK-RV64-NEXT: j .LBB61_283
; CHECK-RV64-NEXT: .LBB61_793: # %cond.load1093
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15102,7 +16630,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_283
+; CHECK-RV64-NEXT: slli a1, a2, 44
+; CHECK-RV64-NEXT: bltz a1, .LBB61_794
+; CHECK-RV64-NEXT: j .LBB61_284
; CHECK-RV64-NEXT: .LBB61_794: # %cond.load1097
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15113,7 +16643,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_284
+; CHECK-RV64-NEXT: slli a1, a2, 43
+; CHECK-RV64-NEXT: bltz a1, .LBB61_795
+; CHECK-RV64-NEXT: j .LBB61_285
; CHECK-RV64-NEXT: .LBB61_795: # %cond.load1101
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15124,7 +16656,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_285
+; CHECK-RV64-NEXT: slli a1, a2, 42
+; CHECK-RV64-NEXT: bltz a1, .LBB61_796
+; CHECK-RV64-NEXT: j .LBB61_286
; CHECK-RV64-NEXT: .LBB61_796: # %cond.load1105
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15135,7 +16669,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_286
+; CHECK-RV64-NEXT: slli a1, a2, 41
+; CHECK-RV64-NEXT: bltz a1, .LBB61_797
+; CHECK-RV64-NEXT: j .LBB61_287
; CHECK-RV64-NEXT: .LBB61_797: # %cond.load1109
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15146,7 +16682,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_287
+; CHECK-RV64-NEXT: slli a1, a2, 40
+; CHECK-RV64-NEXT: bltz a1, .LBB61_798
+; CHECK-RV64-NEXT: j .LBB61_288
; CHECK-RV64-NEXT: .LBB61_798: # %cond.load1113
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15157,7 +16695,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_288
+; CHECK-RV64-NEXT: slli a1, a2, 39
+; CHECK-RV64-NEXT: bltz a1, .LBB61_799
+; CHECK-RV64-NEXT: j .LBB61_289
; CHECK-RV64-NEXT: .LBB61_799: # %cond.load1117
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15168,7 +16708,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_289
+; CHECK-RV64-NEXT: slli a1, a2, 38
+; CHECK-RV64-NEXT: bltz a1, .LBB61_800
+; CHECK-RV64-NEXT: j .LBB61_290
; CHECK-RV64-NEXT: .LBB61_800: # %cond.load1121
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15179,7 +16721,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_290
+; CHECK-RV64-NEXT: slli a1, a2, 37
+; CHECK-RV64-NEXT: bltz a1, .LBB61_801
+; CHECK-RV64-NEXT: j .LBB61_291
; CHECK-RV64-NEXT: .LBB61_801: # %cond.load1125
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15190,7 +16734,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_291
+; CHECK-RV64-NEXT: slli a1, a2, 36
+; CHECK-RV64-NEXT: bltz a1, .LBB61_802
+; CHECK-RV64-NEXT: j .LBB61_292
; CHECK-RV64-NEXT: .LBB61_802: # %cond.load1129
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15201,7 +16747,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_292
+; CHECK-RV64-NEXT: slli a1, a2, 35
+; CHECK-RV64-NEXT: bltz a1, .LBB61_803
+; CHECK-RV64-NEXT: j .LBB61_293
; CHECK-RV64-NEXT: .LBB61_803: # %cond.load1133
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15212,7 +16760,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_293
+; CHECK-RV64-NEXT: slli a1, a2, 34
+; CHECK-RV64-NEXT: bltz a1, .LBB61_804
+; CHECK-RV64-NEXT: j .LBB61_294
; CHECK-RV64-NEXT: .LBB61_804: # %cond.load1137
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15223,7 +16773,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_294
+; CHECK-RV64-NEXT: slli a1, a2, 33
+; CHECK-RV64-NEXT: bltz a1, .LBB61_805
+; CHECK-RV64-NEXT: j .LBB61_295
; CHECK-RV64-NEXT: .LBB61_805: # %cond.load1141
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15234,7 +16786,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_295
+; CHECK-RV64-NEXT: slli a1, a2, 32
+; CHECK-RV64-NEXT: bltz a1, .LBB61_806
+; CHECK-RV64-NEXT: j .LBB61_296
; CHECK-RV64-NEXT: .LBB61_806: # %cond.load1145
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15245,7 +16799,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_296
+; CHECK-RV64-NEXT: slli a1, a2, 31
+; CHECK-RV64-NEXT: bltz a1, .LBB61_807
+; CHECK-RV64-NEXT: j .LBB61_297
; CHECK-RV64-NEXT: .LBB61_807: # %cond.load1149
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15256,7 +16812,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_297
+; CHECK-RV64-NEXT: slli a1, a2, 30
+; CHECK-RV64-NEXT: bltz a1, .LBB61_808
+; CHECK-RV64-NEXT: j .LBB61_298
; CHECK-RV64-NEXT: .LBB61_808: # %cond.load1153
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15267,7 +16825,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_298
+; CHECK-RV64-NEXT: slli a1, a2, 29
+; CHECK-RV64-NEXT: bltz a1, .LBB61_809
+; CHECK-RV64-NEXT: j .LBB61_299
; CHECK-RV64-NEXT: .LBB61_809: # %cond.load1157
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15278,7 +16838,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_299
+; CHECK-RV64-NEXT: slli a1, a2, 28
+; CHECK-RV64-NEXT: bltz a1, .LBB61_810
+; CHECK-RV64-NEXT: j .LBB61_300
; CHECK-RV64-NEXT: .LBB61_810: # %cond.load1161
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15289,7 +16851,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_300
+; CHECK-RV64-NEXT: slli a1, a2, 27
+; CHECK-RV64-NEXT: bltz a1, .LBB61_811
+; CHECK-RV64-NEXT: j .LBB61_301
; CHECK-RV64-NEXT: .LBB61_811: # %cond.load1165
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15300,7 +16864,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_301
+; CHECK-RV64-NEXT: slli a1, a2, 26
+; CHECK-RV64-NEXT: bltz a1, .LBB61_812
+; CHECK-RV64-NEXT: j .LBB61_302
; CHECK-RV64-NEXT: .LBB61_812: # %cond.load1169
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15311,7 +16877,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_302
+; CHECK-RV64-NEXT: slli a1, a2, 25
+; CHECK-RV64-NEXT: bltz a1, .LBB61_813
+; CHECK-RV64-NEXT: j .LBB61_303
; CHECK-RV64-NEXT: .LBB61_813: # %cond.load1173
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15322,7 +16890,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_303
+; CHECK-RV64-NEXT: slli a1, a2, 24
+; CHECK-RV64-NEXT: bltz a1, .LBB61_814
+; CHECK-RV64-NEXT: j .LBB61_304
; CHECK-RV64-NEXT: .LBB61_814: # %cond.load1177
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15333,7 +16903,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_304
+; CHECK-RV64-NEXT: slli a1, a2, 23
+; CHECK-RV64-NEXT: bltz a1, .LBB61_815
+; CHECK-RV64-NEXT: j .LBB61_305
; CHECK-RV64-NEXT: .LBB61_815: # %cond.load1181
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15344,7 +16916,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_305
+; CHECK-RV64-NEXT: slli a1, a2, 22
+; CHECK-RV64-NEXT: bltz a1, .LBB61_816
+; CHECK-RV64-NEXT: j .LBB61_306
; CHECK-RV64-NEXT: .LBB61_816: # %cond.load1185
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15355,7 +16929,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_306
+; CHECK-RV64-NEXT: slli a1, a2, 21
+; CHECK-RV64-NEXT: bltz a1, .LBB61_817
+; CHECK-RV64-NEXT: j .LBB61_307
; CHECK-RV64-NEXT: .LBB61_817: # %cond.load1189
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15366,7 +16942,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_307
+; CHECK-RV64-NEXT: slli a1, a2, 20
+; CHECK-RV64-NEXT: bltz a1, .LBB61_818
+; CHECK-RV64-NEXT: j .LBB61_308
; CHECK-RV64-NEXT: .LBB61_818: # %cond.load1193
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15377,7 +16955,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_308
+; CHECK-RV64-NEXT: slli a1, a2, 19
+; CHECK-RV64-NEXT: bltz a1, .LBB61_819
+; CHECK-RV64-NEXT: j .LBB61_309
; CHECK-RV64-NEXT: .LBB61_819: # %cond.load1197
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15388,7 +16968,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_309
+; CHECK-RV64-NEXT: slli a1, a2, 18
+; CHECK-RV64-NEXT: bltz a1, .LBB61_820
+; CHECK-RV64-NEXT: j .LBB61_310
; CHECK-RV64-NEXT: .LBB61_820: # %cond.load1201
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15399,7 +16981,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_310
+; CHECK-RV64-NEXT: slli a1, a2, 17
+; CHECK-RV64-NEXT: bltz a1, .LBB61_821
+; CHECK-RV64-NEXT: j .LBB61_311
; CHECK-RV64-NEXT: .LBB61_821: # %cond.load1205
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15410,7 +16994,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_311
+; CHECK-RV64-NEXT: slli a1, a2, 16
+; CHECK-RV64-NEXT: bltz a1, .LBB61_822
+; CHECK-RV64-NEXT: j .LBB61_312
; CHECK-RV64-NEXT: .LBB61_822: # %cond.load1209
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15421,7 +17007,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_312
+; CHECK-RV64-NEXT: slli a1, a2, 15
+; CHECK-RV64-NEXT: bltz a1, .LBB61_823
+; CHECK-RV64-NEXT: j .LBB61_313
; CHECK-RV64-NEXT: .LBB61_823: # %cond.load1213
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15432,7 +17020,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_313
+; CHECK-RV64-NEXT: slli a1, a2, 14
+; CHECK-RV64-NEXT: bltz a1, .LBB61_824
+; CHECK-RV64-NEXT: j .LBB61_314
; CHECK-RV64-NEXT: .LBB61_824: # %cond.load1217
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15443,7 +17033,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_314
+; CHECK-RV64-NEXT: slli a1, a2, 13
+; CHECK-RV64-NEXT: bltz a1, .LBB61_825
+; CHECK-RV64-NEXT: j .LBB61_315
; CHECK-RV64-NEXT: .LBB61_825: # %cond.load1221
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15454,7 +17046,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_315
+; CHECK-RV64-NEXT: slli a1, a2, 12
+; CHECK-RV64-NEXT: bltz a1, .LBB61_826
+; CHECK-RV64-NEXT: j .LBB61_316
; CHECK-RV64-NEXT: .LBB61_826: # %cond.load1225
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15465,7 +17059,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_316
+; CHECK-RV64-NEXT: slli a1, a2, 11
+; CHECK-RV64-NEXT: bltz a1, .LBB61_827
+; CHECK-RV64-NEXT: j .LBB61_317
; CHECK-RV64-NEXT: .LBB61_827: # %cond.load1229
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15476,7 +17072,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_317
+; CHECK-RV64-NEXT: slli a1, a2, 10
+; CHECK-RV64-NEXT: bltz a1, .LBB61_828
+; CHECK-RV64-NEXT: j .LBB61_318
; CHECK-RV64-NEXT: .LBB61_828: # %cond.load1233
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15487,7 +17085,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_318
+; CHECK-RV64-NEXT: slli a1, a2, 9
+; CHECK-RV64-NEXT: bltz a1, .LBB61_829
+; CHECK-RV64-NEXT: j .LBB61_319
; CHECK-RV64-NEXT: .LBB61_829: # %cond.load1237
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15498,7 +17098,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_319
+; CHECK-RV64-NEXT: slli a1, a2, 8
+; CHECK-RV64-NEXT: bltz a1, .LBB61_830
+; CHECK-RV64-NEXT: j .LBB61_320
; CHECK-RV64-NEXT: .LBB61_830: # %cond.load1241
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15509,7 +17111,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_320
+; CHECK-RV64-NEXT: slli a1, a2, 7
+; CHECK-RV64-NEXT: bltz a1, .LBB61_831
+; CHECK-RV64-NEXT: j .LBB61_321
; CHECK-RV64-NEXT: .LBB61_831: # %cond.load1245
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15520,7 +17124,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_321
+; CHECK-RV64-NEXT: slli a1, a2, 6
+; CHECK-RV64-NEXT: bltz a1, .LBB61_832
+; CHECK-RV64-NEXT: j .LBB61_322
; CHECK-RV64-NEXT: .LBB61_832: # %cond.load1249
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15531,7 +17137,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_322
+; CHECK-RV64-NEXT: slli a1, a2, 5
+; CHECK-RV64-NEXT: bltz a1, .LBB61_833
+; CHECK-RV64-NEXT: j .LBB61_323
; CHECK-RV64-NEXT: .LBB61_833: # %cond.load1253
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15542,7 +17150,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_323
+; CHECK-RV64-NEXT: slli a1, a2, 4
+; CHECK-RV64-NEXT: bltz a1, .LBB61_834
+; CHECK-RV64-NEXT: j .LBB61_324
; CHECK-RV64-NEXT: .LBB61_834: # %cond.load1257
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15553,7 +17163,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_324
+; CHECK-RV64-NEXT: slli a1, a2, 3
+; CHECK-RV64-NEXT: bltz a1, .LBB61_835
+; CHECK-RV64-NEXT: j .LBB61_325
; CHECK-RV64-NEXT: .LBB61_835: # %cond.load1261
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15564,7 +17176,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_325
+; CHECK-RV64-NEXT: slli a1, a2, 2
+; CHECK-RV64-NEXT: bgez a1, .LBB61_1029
+; CHECK-RV64-NEXT: j .LBB61_326
+; CHECK-RV64-NEXT: .LBB61_1029: # %cond.load1261
+; CHECK-RV64-NEXT: j .LBB61_327
; CHECK-RV64-NEXT: .LBB61_836: # %cond.load1273
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vmv.s.x v16, a2
@@ -15573,7 +17189,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_330
+; CHECK-RV64-NEXT: andi a2, a1, 1
+; CHECK-RV64-NEXT: bnez a2, .LBB61_837
+; CHECK-RV64-NEXT: j .LBB61_331
; CHECK-RV64-NEXT: .LBB61_837: # %cond.load1277
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15584,7 +17202,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_331
+; CHECK-RV64-NEXT: andi a2, a1, 2
+; CHECK-RV64-NEXT: bnez a2, .LBB61_838
+; CHECK-RV64-NEXT: j .LBB61_332
; CHECK-RV64-NEXT: .LBB61_838: # %cond.load1281
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15595,7 +17215,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_332
+; CHECK-RV64-NEXT: andi a2, a1, 4
+; CHECK-RV64-NEXT: bnez a2, .LBB61_839
+; CHECK-RV64-NEXT: j .LBB61_333
; CHECK-RV64-NEXT: .LBB61_839: # %cond.load1285
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15606,7 +17228,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_333
+; CHECK-RV64-NEXT: andi a2, a1, 8
+; CHECK-RV64-NEXT: bnez a2, .LBB61_840
+; CHECK-RV64-NEXT: j .LBB61_334
; CHECK-RV64-NEXT: .LBB61_840: # %cond.load1289
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15617,7 +17241,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_334
+; CHECK-RV64-NEXT: andi a2, a1, 16
+; CHECK-RV64-NEXT: bnez a2, .LBB61_841
+; CHECK-RV64-NEXT: j .LBB61_335
; CHECK-RV64-NEXT: .LBB61_841: # %cond.load1293
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15628,7 +17254,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_335
+; CHECK-RV64-NEXT: andi a2, a1, 32
+; CHECK-RV64-NEXT: bnez a2, .LBB61_842
+; CHECK-RV64-NEXT: j .LBB61_336
; CHECK-RV64-NEXT: .LBB61_842: # %cond.load1297
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15639,7 +17267,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_336
+; CHECK-RV64-NEXT: andi a2, a1, 64
+; CHECK-RV64-NEXT: bnez a2, .LBB61_843
+; CHECK-RV64-NEXT: j .LBB61_337
; CHECK-RV64-NEXT: .LBB61_843: # %cond.load1301
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15650,7 +17280,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_337
+; CHECK-RV64-NEXT: andi a2, a1, 128
+; CHECK-RV64-NEXT: bnez a2, .LBB61_844
+; CHECK-RV64-NEXT: j .LBB61_338
; CHECK-RV64-NEXT: .LBB61_844: # %cond.load1305
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15661,7 +17293,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_338
+; CHECK-RV64-NEXT: andi a2, a1, 256
+; CHECK-RV64-NEXT: bnez a2, .LBB61_845
+; CHECK-RV64-NEXT: j .LBB61_339
; CHECK-RV64-NEXT: .LBB61_845: # %cond.load1309
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15672,7 +17306,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_339
+; CHECK-RV64-NEXT: andi a2, a1, 512
+; CHECK-RV64-NEXT: bnez a2, .LBB61_846
+; CHECK-RV64-NEXT: j .LBB61_340
; CHECK-RV64-NEXT: .LBB61_846: # %cond.load1313
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15683,7 +17319,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_340
+; CHECK-RV64-NEXT: andi a2, a1, 1024
+; CHECK-RV64-NEXT: bnez a2, .LBB61_847
+; CHECK-RV64-NEXT: j .LBB61_341
; CHECK-RV64-NEXT: .LBB61_847: # %cond.load1317
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15694,7 +17332,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_341
+; CHECK-RV64-NEXT: slli a2, a1, 52
+; CHECK-RV64-NEXT: bltz a2, .LBB61_848
+; CHECK-RV64-NEXT: j .LBB61_342
; CHECK-RV64-NEXT: .LBB61_848: # %cond.load1321
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15705,7 +17345,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_342
+; CHECK-RV64-NEXT: slli a2, a1, 51
+; CHECK-RV64-NEXT: bltz a2, .LBB61_849
+; CHECK-RV64-NEXT: j .LBB61_343
; CHECK-RV64-NEXT: .LBB61_849: # %cond.load1325
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15716,7 +17358,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_343
+; CHECK-RV64-NEXT: slli a2, a1, 50
+; CHECK-RV64-NEXT: bltz a2, .LBB61_850
+; CHECK-RV64-NEXT: j .LBB61_344
; CHECK-RV64-NEXT: .LBB61_850: # %cond.load1329
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15727,7 +17371,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_344
+; CHECK-RV64-NEXT: slli a2, a1, 49
+; CHECK-RV64-NEXT: bltz a2, .LBB61_851
+; CHECK-RV64-NEXT: j .LBB61_345
; CHECK-RV64-NEXT: .LBB61_851: # %cond.load1333
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15738,7 +17384,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_345
+; CHECK-RV64-NEXT: slli a2, a1, 48
+; CHECK-RV64-NEXT: bltz a2, .LBB61_852
+; CHECK-RV64-NEXT: j .LBB61_346
; CHECK-RV64-NEXT: .LBB61_852: # %cond.load1337
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15749,7 +17397,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_346
+; CHECK-RV64-NEXT: slli a2, a1, 47
+; CHECK-RV64-NEXT: bltz a2, .LBB61_853
+; CHECK-RV64-NEXT: j .LBB61_347
; CHECK-RV64-NEXT: .LBB61_853: # %cond.load1341
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15760,7 +17410,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_347
+; CHECK-RV64-NEXT: slli a2, a1, 46
+; CHECK-RV64-NEXT: bltz a2, .LBB61_854
+; CHECK-RV64-NEXT: j .LBB61_348
; CHECK-RV64-NEXT: .LBB61_854: # %cond.load1345
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15771,7 +17423,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_348
+; CHECK-RV64-NEXT: slli a2, a1, 45
+; CHECK-RV64-NEXT: bltz a2, .LBB61_855
+; CHECK-RV64-NEXT: j .LBB61_349
; CHECK-RV64-NEXT: .LBB61_855: # %cond.load1349
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15782,7 +17436,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_349
+; CHECK-RV64-NEXT: slli a2, a1, 44
+; CHECK-RV64-NEXT: bltz a2, .LBB61_856
+; CHECK-RV64-NEXT: j .LBB61_350
; CHECK-RV64-NEXT: .LBB61_856: # %cond.load1353
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15793,7 +17449,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_350
+; CHECK-RV64-NEXT: slli a2, a1, 43
+; CHECK-RV64-NEXT: bltz a2, .LBB61_857
+; CHECK-RV64-NEXT: j .LBB61_351
; CHECK-RV64-NEXT: .LBB61_857: # %cond.load1357
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15804,7 +17462,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_351
+; CHECK-RV64-NEXT: slli a2, a1, 42
+; CHECK-RV64-NEXT: bltz a2, .LBB61_858
+; CHECK-RV64-NEXT: j .LBB61_352
; CHECK-RV64-NEXT: .LBB61_858: # %cond.load1361
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15815,7 +17475,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_352
+; CHECK-RV64-NEXT: slli a2, a1, 41
+; CHECK-RV64-NEXT: bltz a2, .LBB61_859
+; CHECK-RV64-NEXT: j .LBB61_353
; CHECK-RV64-NEXT: .LBB61_859: # %cond.load1365
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15826,7 +17488,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_353
+; CHECK-RV64-NEXT: slli a2, a1, 40
+; CHECK-RV64-NEXT: bltz a2, .LBB61_860
+; CHECK-RV64-NEXT: j .LBB61_354
; CHECK-RV64-NEXT: .LBB61_860: # %cond.load1369
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15837,7 +17501,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_354
+; CHECK-RV64-NEXT: slli a2, a1, 39
+; CHECK-RV64-NEXT: bltz a2, .LBB61_861
+; CHECK-RV64-NEXT: j .LBB61_355
; CHECK-RV64-NEXT: .LBB61_861: # %cond.load1373
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15848,7 +17514,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_355
+; CHECK-RV64-NEXT: slli a2, a1, 38
+; CHECK-RV64-NEXT: bltz a2, .LBB61_862
+; CHECK-RV64-NEXT: j .LBB61_356
; CHECK-RV64-NEXT: .LBB61_862: # %cond.load1377
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15859,7 +17527,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_356
+; CHECK-RV64-NEXT: slli a2, a1, 37
+; CHECK-RV64-NEXT: bltz a2, .LBB61_863
+; CHECK-RV64-NEXT: j .LBB61_357
; CHECK-RV64-NEXT: .LBB61_863: # %cond.load1381
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15870,7 +17540,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_357
+; CHECK-RV64-NEXT: slli a2, a1, 36
+; CHECK-RV64-NEXT: bltz a2, .LBB61_864
+; CHECK-RV64-NEXT: j .LBB61_358
; CHECK-RV64-NEXT: .LBB61_864: # %cond.load1385
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15881,7 +17553,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_358
+; CHECK-RV64-NEXT: slli a2, a1, 35
+; CHECK-RV64-NEXT: bltz a2, .LBB61_865
+; CHECK-RV64-NEXT: j .LBB61_359
; CHECK-RV64-NEXT: .LBB61_865: # %cond.load1389
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15892,7 +17566,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_359
+; CHECK-RV64-NEXT: slli a2, a1, 34
+; CHECK-RV64-NEXT: bltz a2, .LBB61_866
+; CHECK-RV64-NEXT: j .LBB61_360
; CHECK-RV64-NEXT: .LBB61_866: # %cond.load1393
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15903,7 +17579,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_360
+; CHECK-RV64-NEXT: slli a2, a1, 33
+; CHECK-RV64-NEXT: bltz a2, .LBB61_867
+; CHECK-RV64-NEXT: j .LBB61_361
; CHECK-RV64-NEXT: .LBB61_867: # %cond.load1397
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15914,7 +17592,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_361
+; CHECK-RV64-NEXT: slli a2, a1, 32
+; CHECK-RV64-NEXT: bltz a2, .LBB61_868
+; CHECK-RV64-NEXT: j .LBB61_362
; CHECK-RV64-NEXT: .LBB61_868: # %cond.load1401
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15925,7 +17605,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_362
+; CHECK-RV64-NEXT: slli a2, a1, 31
+; CHECK-RV64-NEXT: bltz a2, .LBB61_869
+; CHECK-RV64-NEXT: j .LBB61_363
; CHECK-RV64-NEXT: .LBB61_869: # %cond.load1405
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15936,7 +17618,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_363
+; CHECK-RV64-NEXT: slli a2, a1, 30
+; CHECK-RV64-NEXT: bltz a2, .LBB61_870
+; CHECK-RV64-NEXT: j .LBB61_364
; CHECK-RV64-NEXT: .LBB61_870: # %cond.load1409
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15947,7 +17631,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_364
+; CHECK-RV64-NEXT: slli a2, a1, 29
+; CHECK-RV64-NEXT: bltz a2, .LBB61_871
+; CHECK-RV64-NEXT: j .LBB61_365
; CHECK-RV64-NEXT: .LBB61_871: # %cond.load1413
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15958,7 +17644,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_365
+; CHECK-RV64-NEXT: slli a2, a1, 28
+; CHECK-RV64-NEXT: bltz a2, .LBB61_872
+; CHECK-RV64-NEXT: j .LBB61_366
; CHECK-RV64-NEXT: .LBB61_872: # %cond.load1417
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15969,7 +17657,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_366
+; CHECK-RV64-NEXT: slli a2, a1, 27
+; CHECK-RV64-NEXT: bltz a2, .LBB61_873
+; CHECK-RV64-NEXT: j .LBB61_367
; CHECK-RV64-NEXT: .LBB61_873: # %cond.load1421
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15980,7 +17670,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_367
+; CHECK-RV64-NEXT: slli a2, a1, 26
+; CHECK-RV64-NEXT: bltz a2, .LBB61_874
+; CHECK-RV64-NEXT: j .LBB61_368
; CHECK-RV64-NEXT: .LBB61_874: # %cond.load1425
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -15991,7 +17683,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_368
+; CHECK-RV64-NEXT: slli a2, a1, 25
+; CHECK-RV64-NEXT: bltz a2, .LBB61_875
+; CHECK-RV64-NEXT: j .LBB61_369
; CHECK-RV64-NEXT: .LBB61_875: # %cond.load1429
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16002,7 +17696,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_369
+; CHECK-RV64-NEXT: slli a2, a1, 24
+; CHECK-RV64-NEXT: bltz a2, .LBB61_876
+; CHECK-RV64-NEXT: j .LBB61_370
; CHECK-RV64-NEXT: .LBB61_876: # %cond.load1433
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16013,7 +17709,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_370
+; CHECK-RV64-NEXT: slli a2, a1, 23
+; CHECK-RV64-NEXT: bltz a2, .LBB61_877
+; CHECK-RV64-NEXT: j .LBB61_371
; CHECK-RV64-NEXT: .LBB61_877: # %cond.load1437
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16024,7 +17722,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_371
+; CHECK-RV64-NEXT: slli a2, a1, 22
+; CHECK-RV64-NEXT: bltz a2, .LBB61_878
+; CHECK-RV64-NEXT: j .LBB61_372
; CHECK-RV64-NEXT: .LBB61_878: # %cond.load1441
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16035,7 +17735,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_372
+; CHECK-RV64-NEXT: slli a2, a1, 21
+; CHECK-RV64-NEXT: bltz a2, .LBB61_879
+; CHECK-RV64-NEXT: j .LBB61_373
; CHECK-RV64-NEXT: .LBB61_879: # %cond.load1445
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16046,7 +17748,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_373
+; CHECK-RV64-NEXT: slli a2, a1, 20
+; CHECK-RV64-NEXT: bltz a2, .LBB61_880
+; CHECK-RV64-NEXT: j .LBB61_374
; CHECK-RV64-NEXT: .LBB61_880: # %cond.load1449
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16057,7 +17761,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_374
+; CHECK-RV64-NEXT: slli a2, a1, 19
+; CHECK-RV64-NEXT: bltz a2, .LBB61_881
+; CHECK-RV64-NEXT: j .LBB61_375
; CHECK-RV64-NEXT: .LBB61_881: # %cond.load1453
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16068,7 +17774,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_375
+; CHECK-RV64-NEXT: slli a2, a1, 18
+; CHECK-RV64-NEXT: bltz a2, .LBB61_882
+; CHECK-RV64-NEXT: j .LBB61_376
; CHECK-RV64-NEXT: .LBB61_882: # %cond.load1457
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16079,7 +17787,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_376
+; CHECK-RV64-NEXT: slli a2, a1, 17
+; CHECK-RV64-NEXT: bltz a2, .LBB61_883
+; CHECK-RV64-NEXT: j .LBB61_377
; CHECK-RV64-NEXT: .LBB61_883: # %cond.load1461
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16090,7 +17800,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_377
+; CHECK-RV64-NEXT: slli a2, a1, 16
+; CHECK-RV64-NEXT: bltz a2, .LBB61_884
+; CHECK-RV64-NEXT: j .LBB61_378
; CHECK-RV64-NEXT: .LBB61_884: # %cond.load1465
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16101,7 +17813,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_378
+; CHECK-RV64-NEXT: slli a2, a1, 15
+; CHECK-RV64-NEXT: bltz a2, .LBB61_885
+; CHECK-RV64-NEXT: j .LBB61_379
; CHECK-RV64-NEXT: .LBB61_885: # %cond.load1469
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16112,7 +17826,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_379
+; CHECK-RV64-NEXT: slli a2, a1, 14
+; CHECK-RV64-NEXT: bltz a2, .LBB61_886
+; CHECK-RV64-NEXT: j .LBB61_380
; CHECK-RV64-NEXT: .LBB61_886: # %cond.load1473
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16123,7 +17839,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_380
+; CHECK-RV64-NEXT: slli a2, a1, 13
+; CHECK-RV64-NEXT: bltz a2, .LBB61_887
+; CHECK-RV64-NEXT: j .LBB61_381
; CHECK-RV64-NEXT: .LBB61_887: # %cond.load1477
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16134,7 +17852,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_381
+; CHECK-RV64-NEXT: slli a2, a1, 12
+; CHECK-RV64-NEXT: bltz a2, .LBB61_888
+; CHECK-RV64-NEXT: j .LBB61_382
; CHECK-RV64-NEXT: .LBB61_888: # %cond.load1481
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16145,7 +17865,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_382
+; CHECK-RV64-NEXT: slli a2, a1, 11
+; CHECK-RV64-NEXT: bltz a2, .LBB61_889
+; CHECK-RV64-NEXT: j .LBB61_383
; CHECK-RV64-NEXT: .LBB61_889: # %cond.load1485
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16156,7 +17878,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_383
+; CHECK-RV64-NEXT: slli a2, a1, 10
+; CHECK-RV64-NEXT: bltz a2, .LBB61_890
+; CHECK-RV64-NEXT: j .LBB61_384
; CHECK-RV64-NEXT: .LBB61_890: # %cond.load1489
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16167,7 +17891,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_384
+; CHECK-RV64-NEXT: slli a2, a1, 9
+; CHECK-RV64-NEXT: bltz a2, .LBB61_891
+; CHECK-RV64-NEXT: j .LBB61_385
; CHECK-RV64-NEXT: .LBB61_891: # %cond.load1493
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16178,7 +17904,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_385
+; CHECK-RV64-NEXT: slli a2, a1, 8
+; CHECK-RV64-NEXT: bltz a2, .LBB61_892
+; CHECK-RV64-NEXT: j .LBB61_386
; CHECK-RV64-NEXT: .LBB61_892: # %cond.load1497
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16189,7 +17917,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_386
+; CHECK-RV64-NEXT: slli a2, a1, 7
+; CHECK-RV64-NEXT: bltz a2, .LBB61_893
+; CHECK-RV64-NEXT: j .LBB61_387
; CHECK-RV64-NEXT: .LBB61_893: # %cond.load1501
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16200,7 +17930,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_387
+; CHECK-RV64-NEXT: slli a2, a1, 6
+; CHECK-RV64-NEXT: bltz a2, .LBB61_894
+; CHECK-RV64-NEXT: j .LBB61_388
; CHECK-RV64-NEXT: .LBB61_894: # %cond.load1505
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16211,7 +17943,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_388
+; CHECK-RV64-NEXT: slli a2, a1, 5
+; CHECK-RV64-NEXT: bltz a2, .LBB61_895
+; CHECK-RV64-NEXT: j .LBB61_389
; CHECK-RV64-NEXT: .LBB61_895: # %cond.load1509
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16222,7 +17956,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_389
+; CHECK-RV64-NEXT: slli a2, a1, 4
+; CHECK-RV64-NEXT: bltz a2, .LBB61_896
+; CHECK-RV64-NEXT: j .LBB61_390
; CHECK-RV64-NEXT: .LBB61_896: # %cond.load1513
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16233,7 +17969,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_390
+; CHECK-RV64-NEXT: slli a2, a1, 3
+; CHECK-RV64-NEXT: bltz a2, .LBB61_897
+; CHECK-RV64-NEXT: j .LBB61_391
; CHECK-RV64-NEXT: .LBB61_897: # %cond.load1517
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16244,7 +17982,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_391
+; CHECK-RV64-NEXT: slli a2, a1, 2
+; CHECK-RV64-NEXT: bgez a2, .LBB61_1030
+; CHECK-RV64-NEXT: j .LBB61_392
+; CHECK-RV64-NEXT: .LBB61_1030: # %cond.load1517
+; CHECK-RV64-NEXT: j .LBB61_393
; CHECK-RV64-NEXT: .LBB61_898: # %cond.load1529
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: vmv.s.x v16, a1
@@ -16253,7 +17995,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_396
+; CHECK-RV64-NEXT: andi a1, a2, 1
+; CHECK-RV64-NEXT: bnez a1, .LBB61_899
+; CHECK-RV64-NEXT: j .LBB61_397
; CHECK-RV64-NEXT: .LBB61_899: # %cond.load1533
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16264,7 +18008,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_397
+; CHECK-RV64-NEXT: andi a1, a2, 2
+; CHECK-RV64-NEXT: bnez a1, .LBB61_900
+; CHECK-RV64-NEXT: j .LBB61_398
; CHECK-RV64-NEXT: .LBB61_900: # %cond.load1537
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16275,7 +18021,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_398
+; CHECK-RV64-NEXT: andi a1, a2, 4
+; CHECK-RV64-NEXT: bnez a1, .LBB61_901
+; CHECK-RV64-NEXT: j .LBB61_399
; CHECK-RV64-NEXT: .LBB61_901: # %cond.load1541
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16286,7 +18034,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_399
+; CHECK-RV64-NEXT: andi a1, a2, 8
+; CHECK-RV64-NEXT: bnez a1, .LBB61_902
+; CHECK-RV64-NEXT: j .LBB61_400
; CHECK-RV64-NEXT: .LBB61_902: # %cond.load1545
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16297,7 +18047,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_400
+; CHECK-RV64-NEXT: andi a1, a2, 16
+; CHECK-RV64-NEXT: bnez a1, .LBB61_903
+; CHECK-RV64-NEXT: j .LBB61_401
; CHECK-RV64-NEXT: .LBB61_903: # %cond.load1549
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16308,7 +18060,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_401
+; CHECK-RV64-NEXT: andi a1, a2, 32
+; CHECK-RV64-NEXT: bnez a1, .LBB61_904
+; CHECK-RV64-NEXT: j .LBB61_402
; CHECK-RV64-NEXT: .LBB61_904: # %cond.load1553
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16319,7 +18073,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_402
+; CHECK-RV64-NEXT: andi a1, a2, 64
+; CHECK-RV64-NEXT: bnez a1, .LBB61_905
+; CHECK-RV64-NEXT: j .LBB61_403
; CHECK-RV64-NEXT: .LBB61_905: # %cond.load1557
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16330,7 +18086,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_403
+; CHECK-RV64-NEXT: andi a1, a2, 128
+; CHECK-RV64-NEXT: bnez a1, .LBB61_906
+; CHECK-RV64-NEXT: j .LBB61_404
; CHECK-RV64-NEXT: .LBB61_906: # %cond.load1561
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16341,7 +18099,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_404
+; CHECK-RV64-NEXT: andi a1, a2, 256
+; CHECK-RV64-NEXT: bnez a1, .LBB61_907
+; CHECK-RV64-NEXT: j .LBB61_405
; CHECK-RV64-NEXT: .LBB61_907: # %cond.load1565
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16352,7 +18112,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_405
+; CHECK-RV64-NEXT: andi a1, a2, 512
+; CHECK-RV64-NEXT: bnez a1, .LBB61_908
+; CHECK-RV64-NEXT: j .LBB61_406
; CHECK-RV64-NEXT: .LBB61_908: # %cond.load1569
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16363,7 +18125,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_406
+; CHECK-RV64-NEXT: andi a1, a2, 1024
+; CHECK-RV64-NEXT: bnez a1, .LBB61_909
+; CHECK-RV64-NEXT: j .LBB61_407
; CHECK-RV64-NEXT: .LBB61_909: # %cond.load1573
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16374,7 +18138,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_407
+; CHECK-RV64-NEXT: slli a1, a2, 52
+; CHECK-RV64-NEXT: bltz a1, .LBB61_910
+; CHECK-RV64-NEXT: j .LBB61_408
; CHECK-RV64-NEXT: .LBB61_910: # %cond.load1577
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16385,7 +18151,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_408
+; CHECK-RV64-NEXT: slli a1, a2, 51
+; CHECK-RV64-NEXT: bltz a1, .LBB61_911
+; CHECK-RV64-NEXT: j .LBB61_409
; CHECK-RV64-NEXT: .LBB61_911: # %cond.load1581
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16396,7 +18164,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_409
+; CHECK-RV64-NEXT: slli a1, a2, 50
+; CHECK-RV64-NEXT: bltz a1, .LBB61_912
+; CHECK-RV64-NEXT: j .LBB61_410
; CHECK-RV64-NEXT: .LBB61_912: # %cond.load1585
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16407,7 +18177,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_410
+; CHECK-RV64-NEXT: slli a1, a2, 49
+; CHECK-RV64-NEXT: bltz a1, .LBB61_913
+; CHECK-RV64-NEXT: j .LBB61_411
; CHECK-RV64-NEXT: .LBB61_913: # %cond.load1589
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16418,7 +18190,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_411
+; CHECK-RV64-NEXT: slli a1, a2, 48
+; CHECK-RV64-NEXT: bltz a1, .LBB61_914
+; CHECK-RV64-NEXT: j .LBB61_412
; CHECK-RV64-NEXT: .LBB61_914: # %cond.load1593
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16429,7 +18203,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_412
+; CHECK-RV64-NEXT: slli a1, a2, 47
+; CHECK-RV64-NEXT: bltz a1, .LBB61_915
+; CHECK-RV64-NEXT: j .LBB61_413
; CHECK-RV64-NEXT: .LBB61_915: # %cond.load1597
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16440,7 +18216,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_413
+; CHECK-RV64-NEXT: slli a1, a2, 46
+; CHECK-RV64-NEXT: bltz a1, .LBB61_916
+; CHECK-RV64-NEXT: j .LBB61_414
; CHECK-RV64-NEXT: .LBB61_916: # %cond.load1601
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16451,7 +18229,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_414
+; CHECK-RV64-NEXT: slli a1, a2, 45
+; CHECK-RV64-NEXT: bltz a1, .LBB61_917
+; CHECK-RV64-NEXT: j .LBB61_415
; CHECK-RV64-NEXT: .LBB61_917: # %cond.load1605
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16462,7 +18242,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_415
+; CHECK-RV64-NEXT: slli a1, a2, 44
+; CHECK-RV64-NEXT: bltz a1, .LBB61_918
+; CHECK-RV64-NEXT: j .LBB61_416
; CHECK-RV64-NEXT: .LBB61_918: # %cond.load1609
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16473,7 +18255,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_416
+; CHECK-RV64-NEXT: slli a1, a2, 43
+; CHECK-RV64-NEXT: bltz a1, .LBB61_919
+; CHECK-RV64-NEXT: j .LBB61_417
; CHECK-RV64-NEXT: .LBB61_919: # %cond.load1613
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16484,7 +18268,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_417
+; CHECK-RV64-NEXT: slli a1, a2, 42
+; CHECK-RV64-NEXT: bltz a1, .LBB61_920
+; CHECK-RV64-NEXT: j .LBB61_418
; CHECK-RV64-NEXT: .LBB61_920: # %cond.load1617
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16495,7 +18281,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_418
+; CHECK-RV64-NEXT: slli a1, a2, 41
+; CHECK-RV64-NEXT: bltz a1, .LBB61_921
+; CHECK-RV64-NEXT: j .LBB61_419
; CHECK-RV64-NEXT: .LBB61_921: # %cond.load1621
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16506,7 +18294,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_419
+; CHECK-RV64-NEXT: slli a1, a2, 40
+; CHECK-RV64-NEXT: bltz a1, .LBB61_922
+; CHECK-RV64-NEXT: j .LBB61_420
; CHECK-RV64-NEXT: .LBB61_922: # %cond.load1625
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16517,7 +18307,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_420
+; CHECK-RV64-NEXT: slli a1, a2, 39
+; CHECK-RV64-NEXT: bltz a1, .LBB61_923
+; CHECK-RV64-NEXT: j .LBB61_421
; CHECK-RV64-NEXT: .LBB61_923: # %cond.load1629
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16528,7 +18320,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_421
+; CHECK-RV64-NEXT: slli a1, a2, 38
+; CHECK-RV64-NEXT: bltz a1, .LBB61_924
+; CHECK-RV64-NEXT: j .LBB61_422
; CHECK-RV64-NEXT: .LBB61_924: # %cond.load1633
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16539,7 +18333,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_422
+; CHECK-RV64-NEXT: slli a1, a2, 37
+; CHECK-RV64-NEXT: bltz a1, .LBB61_925
+; CHECK-RV64-NEXT: j .LBB61_423
; CHECK-RV64-NEXT: .LBB61_925: # %cond.load1637
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16550,7 +18346,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_423
+; CHECK-RV64-NEXT: slli a1, a2, 36
+; CHECK-RV64-NEXT: bltz a1, .LBB61_926
+; CHECK-RV64-NEXT: j .LBB61_424
; CHECK-RV64-NEXT: .LBB61_926: # %cond.load1641
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16561,7 +18359,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_424
+; CHECK-RV64-NEXT: slli a1, a2, 35
+; CHECK-RV64-NEXT: bltz a1, .LBB61_927
+; CHECK-RV64-NEXT: j .LBB61_425
; CHECK-RV64-NEXT: .LBB61_927: # %cond.load1645
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16572,7 +18372,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_425
+; CHECK-RV64-NEXT: slli a1, a2, 34
+; CHECK-RV64-NEXT: bltz a1, .LBB61_928
+; CHECK-RV64-NEXT: j .LBB61_426
; CHECK-RV64-NEXT: .LBB61_928: # %cond.load1649
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16583,7 +18385,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_426
+; CHECK-RV64-NEXT: slli a1, a2, 33
+; CHECK-RV64-NEXT: bltz a1, .LBB61_929
+; CHECK-RV64-NEXT: j .LBB61_427
; CHECK-RV64-NEXT: .LBB61_929: # %cond.load1653
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16594,7 +18398,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_427
+; CHECK-RV64-NEXT: slli a1, a2, 32
+; CHECK-RV64-NEXT: bltz a1, .LBB61_930
+; CHECK-RV64-NEXT: j .LBB61_428
; CHECK-RV64-NEXT: .LBB61_930: # %cond.load1657
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16605,7 +18411,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_428
+; CHECK-RV64-NEXT: slli a1, a2, 31
+; CHECK-RV64-NEXT: bltz a1, .LBB61_931
+; CHECK-RV64-NEXT: j .LBB61_429
; CHECK-RV64-NEXT: .LBB61_931: # %cond.load1661
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16616,7 +18424,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_429
+; CHECK-RV64-NEXT: slli a1, a2, 30
+; CHECK-RV64-NEXT: bltz a1, .LBB61_932
+; CHECK-RV64-NEXT: j .LBB61_430
; CHECK-RV64-NEXT: .LBB61_932: # %cond.load1665
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16627,7 +18437,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_430
+; CHECK-RV64-NEXT: slli a1, a2, 29
+; CHECK-RV64-NEXT: bltz a1, .LBB61_933
+; CHECK-RV64-NEXT: j .LBB61_431
; CHECK-RV64-NEXT: .LBB61_933: # %cond.load1669
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16638,7 +18450,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_431
+; CHECK-RV64-NEXT: slli a1, a2, 28
+; CHECK-RV64-NEXT: bltz a1, .LBB61_934
+; CHECK-RV64-NEXT: j .LBB61_432
; CHECK-RV64-NEXT: .LBB61_934: # %cond.load1673
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16649,7 +18463,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_432
+; CHECK-RV64-NEXT: slli a1, a2, 27
+; CHECK-RV64-NEXT: bltz a1, .LBB61_935
+; CHECK-RV64-NEXT: j .LBB61_433
; CHECK-RV64-NEXT: .LBB61_935: # %cond.load1677
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16660,7 +18476,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_433
+; CHECK-RV64-NEXT: slli a1, a2, 26
+; CHECK-RV64-NEXT: bltz a1, .LBB61_936
+; CHECK-RV64-NEXT: j .LBB61_434
; CHECK-RV64-NEXT: .LBB61_936: # %cond.load1681
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16671,7 +18489,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_434
+; CHECK-RV64-NEXT: slli a1, a2, 25
+; CHECK-RV64-NEXT: bltz a1, .LBB61_937
+; CHECK-RV64-NEXT: j .LBB61_435
; CHECK-RV64-NEXT: .LBB61_937: # %cond.load1685
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16682,7 +18502,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_435
+; CHECK-RV64-NEXT: slli a1, a2, 24
+; CHECK-RV64-NEXT: bltz a1, .LBB61_938
+; CHECK-RV64-NEXT: j .LBB61_436
; CHECK-RV64-NEXT: .LBB61_938: # %cond.load1689
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16693,7 +18515,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_436
+; CHECK-RV64-NEXT: slli a1, a2, 23
+; CHECK-RV64-NEXT: bltz a1, .LBB61_939
+; CHECK-RV64-NEXT: j .LBB61_437
; CHECK-RV64-NEXT: .LBB61_939: # %cond.load1693
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16704,7 +18528,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_437
+; CHECK-RV64-NEXT: slli a1, a2, 22
+; CHECK-RV64-NEXT: bltz a1, .LBB61_940
+; CHECK-RV64-NEXT: j .LBB61_438
; CHECK-RV64-NEXT: .LBB61_940: # %cond.load1697
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16715,7 +18541,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_438
+; CHECK-RV64-NEXT: slli a1, a2, 21
+; CHECK-RV64-NEXT: bltz a1, .LBB61_941
+; CHECK-RV64-NEXT: j .LBB61_439
; CHECK-RV64-NEXT: .LBB61_941: # %cond.load1701
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16726,7 +18554,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_439
+; CHECK-RV64-NEXT: slli a1, a2, 20
+; CHECK-RV64-NEXT: bltz a1, .LBB61_942
+; CHECK-RV64-NEXT: j .LBB61_440
; CHECK-RV64-NEXT: .LBB61_942: # %cond.load1705
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16737,7 +18567,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_440
+; CHECK-RV64-NEXT: slli a1, a2, 19
+; CHECK-RV64-NEXT: bltz a1, .LBB61_943
+; CHECK-RV64-NEXT: j .LBB61_441
; CHECK-RV64-NEXT: .LBB61_943: # %cond.load1709
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16748,7 +18580,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_441
+; CHECK-RV64-NEXT: slli a1, a2, 18
+; CHECK-RV64-NEXT: bltz a1, .LBB61_944
+; CHECK-RV64-NEXT: j .LBB61_442
; CHECK-RV64-NEXT: .LBB61_944: # %cond.load1713
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16759,7 +18593,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_442
+; CHECK-RV64-NEXT: slli a1, a2, 17
+; CHECK-RV64-NEXT: bltz a1, .LBB61_945
+; CHECK-RV64-NEXT: j .LBB61_443
; CHECK-RV64-NEXT: .LBB61_945: # %cond.load1717
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16770,7 +18606,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_443
+; CHECK-RV64-NEXT: slli a1, a2, 16
+; CHECK-RV64-NEXT: bltz a1, .LBB61_946
+; CHECK-RV64-NEXT: j .LBB61_444
; CHECK-RV64-NEXT: .LBB61_946: # %cond.load1721
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16781,7 +18619,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_444
+; CHECK-RV64-NEXT: slli a1, a2, 15
+; CHECK-RV64-NEXT: bltz a1, .LBB61_947
+; CHECK-RV64-NEXT: j .LBB61_445
; CHECK-RV64-NEXT: .LBB61_947: # %cond.load1725
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16792,7 +18632,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_445
+; CHECK-RV64-NEXT: slli a1, a2, 14
+; CHECK-RV64-NEXT: bltz a1, .LBB61_948
+; CHECK-RV64-NEXT: j .LBB61_446
; CHECK-RV64-NEXT: .LBB61_948: # %cond.load1729
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16803,7 +18645,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_446
+; CHECK-RV64-NEXT: slli a1, a2, 13
+; CHECK-RV64-NEXT: bltz a1, .LBB61_949
+; CHECK-RV64-NEXT: j .LBB61_447
; CHECK-RV64-NEXT: .LBB61_949: # %cond.load1733
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16814,7 +18658,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_447
+; CHECK-RV64-NEXT: slli a1, a2, 12
+; CHECK-RV64-NEXT: bltz a1, .LBB61_950
+; CHECK-RV64-NEXT: j .LBB61_448
; CHECK-RV64-NEXT: .LBB61_950: # %cond.load1737
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16825,7 +18671,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_448
+; CHECK-RV64-NEXT: slli a1, a2, 11
+; CHECK-RV64-NEXT: bltz a1, .LBB61_951
+; CHECK-RV64-NEXT: j .LBB61_449
; CHECK-RV64-NEXT: .LBB61_951: # %cond.load1741
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16836,7 +18684,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_449
+; CHECK-RV64-NEXT: slli a1, a2, 10
+; CHECK-RV64-NEXT: bltz a1, .LBB61_952
+; CHECK-RV64-NEXT: j .LBB61_450
; CHECK-RV64-NEXT: .LBB61_952: # %cond.load1745
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16847,7 +18697,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_450
+; CHECK-RV64-NEXT: slli a1, a2, 9
+; CHECK-RV64-NEXT: bltz a1, .LBB61_953
+; CHECK-RV64-NEXT: j .LBB61_451
; CHECK-RV64-NEXT: .LBB61_953: # %cond.load1749
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16858,7 +18710,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_451
+; CHECK-RV64-NEXT: slli a1, a2, 8
+; CHECK-RV64-NEXT: bltz a1, .LBB61_954
+; CHECK-RV64-NEXT: j .LBB61_452
; CHECK-RV64-NEXT: .LBB61_954: # %cond.load1753
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16869,7 +18723,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_452
+; CHECK-RV64-NEXT: slli a1, a2, 7
+; CHECK-RV64-NEXT: bltz a1, .LBB61_955
+; CHECK-RV64-NEXT: j .LBB61_453
; CHECK-RV64-NEXT: .LBB61_955: # %cond.load1757
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16880,7 +18736,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_453
+; CHECK-RV64-NEXT: slli a1, a2, 6
+; CHECK-RV64-NEXT: bltz a1, .LBB61_956
+; CHECK-RV64-NEXT: j .LBB61_454
; CHECK-RV64-NEXT: .LBB61_956: # %cond.load1761
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16891,7 +18749,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_454
+; CHECK-RV64-NEXT: slli a1, a2, 5
+; CHECK-RV64-NEXT: bltz a1, .LBB61_957
+; CHECK-RV64-NEXT: j .LBB61_455
; CHECK-RV64-NEXT: .LBB61_957: # %cond.load1765
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16902,7 +18762,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_455
+; CHECK-RV64-NEXT: slli a1, a2, 4
+; CHECK-RV64-NEXT: bltz a1, .LBB61_958
+; CHECK-RV64-NEXT: j .LBB61_456
; CHECK-RV64-NEXT: .LBB61_958: # %cond.load1769
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16913,7 +18775,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_456
+; CHECK-RV64-NEXT: slli a1, a2, 3
+; CHECK-RV64-NEXT: bltz a1, .LBB61_959
+; CHECK-RV64-NEXT: j .LBB61_457
; CHECK-RV64-NEXT: .LBB61_959: # %cond.load1773
; CHECK-RV64-NEXT: lbu a1, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16924,7 +18788,11 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_457
+; CHECK-RV64-NEXT: slli a1, a2, 2
+; CHECK-RV64-NEXT: bgez a1, .LBB61_1031
+; CHECK-RV64-NEXT: j .LBB61_458
+; CHECK-RV64-NEXT: .LBB61_1031: # %cond.load1773
+; CHECK-RV64-NEXT: j .LBB61_459
; CHECK-RV64-NEXT: .LBB61_960: # %cond.load1785
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: vmv.s.x v16, a2
@@ -16933,7 +18801,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_462
+; CHECK-RV64-NEXT: andi a2, a1, 1
+; CHECK-RV64-NEXT: bnez a2, .LBB61_961
+; CHECK-RV64-NEXT: j .LBB61_463
; CHECK-RV64-NEXT: .LBB61_961: # %cond.load1789
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16944,7 +18814,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_463
+; CHECK-RV64-NEXT: andi a2, a1, 2
+; CHECK-RV64-NEXT: bnez a2, .LBB61_962
+; CHECK-RV64-NEXT: j .LBB61_464
; CHECK-RV64-NEXT: .LBB61_962: # %cond.load1793
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16955,7 +18827,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_464
+; CHECK-RV64-NEXT: andi a2, a1, 4
+; CHECK-RV64-NEXT: bnez a2, .LBB61_963
+; CHECK-RV64-NEXT: j .LBB61_465
; CHECK-RV64-NEXT: .LBB61_963: # %cond.load1797
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16966,7 +18840,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_465
+; CHECK-RV64-NEXT: andi a2, a1, 8
+; CHECK-RV64-NEXT: bnez a2, .LBB61_964
+; CHECK-RV64-NEXT: j .LBB61_466
; CHECK-RV64-NEXT: .LBB61_964: # %cond.load1801
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16977,7 +18853,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_466
+; CHECK-RV64-NEXT: andi a2, a1, 16
+; CHECK-RV64-NEXT: bnez a2, .LBB61_965
+; CHECK-RV64-NEXT: j .LBB61_467
; CHECK-RV64-NEXT: .LBB61_965: # %cond.load1805
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16988,7 +18866,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_467
+; CHECK-RV64-NEXT: andi a2, a1, 32
+; CHECK-RV64-NEXT: bnez a2, .LBB61_966
+; CHECK-RV64-NEXT: j .LBB61_468
; CHECK-RV64-NEXT: .LBB61_966: # %cond.load1809
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -16999,7 +18879,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_468
+; CHECK-RV64-NEXT: andi a2, a1, 64
+; CHECK-RV64-NEXT: bnez a2, .LBB61_967
+; CHECK-RV64-NEXT: j .LBB61_469
; CHECK-RV64-NEXT: .LBB61_967: # %cond.load1813
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17010,7 +18892,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_469
+; CHECK-RV64-NEXT: andi a2, a1, 128
+; CHECK-RV64-NEXT: bnez a2, .LBB61_968
+; CHECK-RV64-NEXT: j .LBB61_470
; CHECK-RV64-NEXT: .LBB61_968: # %cond.load1817
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17021,7 +18905,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_470
+; CHECK-RV64-NEXT: andi a2, a1, 256
+; CHECK-RV64-NEXT: bnez a2, .LBB61_969
+; CHECK-RV64-NEXT: j .LBB61_471
; CHECK-RV64-NEXT: .LBB61_969: # %cond.load1821
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17032,7 +18918,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_471
+; CHECK-RV64-NEXT: andi a2, a1, 512
+; CHECK-RV64-NEXT: bnez a2, .LBB61_970
+; CHECK-RV64-NEXT: j .LBB61_472
; CHECK-RV64-NEXT: .LBB61_970: # %cond.load1825
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17043,7 +18931,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_472
+; CHECK-RV64-NEXT: andi a2, a1, 1024
+; CHECK-RV64-NEXT: bnez a2, .LBB61_971
+; CHECK-RV64-NEXT: j .LBB61_473
; CHECK-RV64-NEXT: .LBB61_971: # %cond.load1829
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17054,7 +18944,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_473
+; CHECK-RV64-NEXT: slli a2, a1, 52
+; CHECK-RV64-NEXT: bltz a2, .LBB61_972
+; CHECK-RV64-NEXT: j .LBB61_474
; CHECK-RV64-NEXT: .LBB61_972: # %cond.load1833
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17065,7 +18957,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_474
+; CHECK-RV64-NEXT: slli a2, a1, 51
+; CHECK-RV64-NEXT: bltz a2, .LBB61_973
+; CHECK-RV64-NEXT: j .LBB61_475
; CHECK-RV64-NEXT: .LBB61_973: # %cond.load1837
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17076,7 +18970,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_475
+; CHECK-RV64-NEXT: slli a2, a1, 50
+; CHECK-RV64-NEXT: bltz a2, .LBB61_974
+; CHECK-RV64-NEXT: j .LBB61_476
; CHECK-RV64-NEXT: .LBB61_974: # %cond.load1841
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17087,7 +18983,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_476
+; CHECK-RV64-NEXT: slli a2, a1, 49
+; CHECK-RV64-NEXT: bltz a2, .LBB61_975
+; CHECK-RV64-NEXT: j .LBB61_477
; CHECK-RV64-NEXT: .LBB61_975: # %cond.load1845
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17098,7 +18996,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_477
+; CHECK-RV64-NEXT: slli a2, a1, 48
+; CHECK-RV64-NEXT: bltz a2, .LBB61_976
+; CHECK-RV64-NEXT: j .LBB61_478
; CHECK-RV64-NEXT: .LBB61_976: # %cond.load1849
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17109,7 +19009,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_478
+; CHECK-RV64-NEXT: slli a2, a1, 47
+; CHECK-RV64-NEXT: bltz a2, .LBB61_977
+; CHECK-RV64-NEXT: j .LBB61_479
; CHECK-RV64-NEXT: .LBB61_977: # %cond.load1853
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17120,7 +19022,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_479
+; CHECK-RV64-NEXT: slli a2, a1, 46
+; CHECK-RV64-NEXT: bltz a2, .LBB61_978
+; CHECK-RV64-NEXT: j .LBB61_480
; CHECK-RV64-NEXT: .LBB61_978: # %cond.load1857
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17131,7 +19035,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_480
+; CHECK-RV64-NEXT: slli a2, a1, 45
+; CHECK-RV64-NEXT: bltz a2, .LBB61_979
+; CHECK-RV64-NEXT: j .LBB61_481
; CHECK-RV64-NEXT: .LBB61_979: # %cond.load1861
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17142,7 +19048,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_481
+; CHECK-RV64-NEXT: slli a2, a1, 44
+; CHECK-RV64-NEXT: bltz a2, .LBB61_980
+; CHECK-RV64-NEXT: j .LBB61_482
; CHECK-RV64-NEXT: .LBB61_980: # %cond.load1865
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17153,7 +19061,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_482
+; CHECK-RV64-NEXT: slli a2, a1, 43
+; CHECK-RV64-NEXT: bltz a2, .LBB61_981
+; CHECK-RV64-NEXT: j .LBB61_483
; CHECK-RV64-NEXT: .LBB61_981: # %cond.load1869
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17164,7 +19074,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_483
+; CHECK-RV64-NEXT: slli a2, a1, 42
+; CHECK-RV64-NEXT: bltz a2, .LBB61_982
+; CHECK-RV64-NEXT: j .LBB61_484
; CHECK-RV64-NEXT: .LBB61_982: # %cond.load1873
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17175,7 +19087,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_484
+; CHECK-RV64-NEXT: slli a2, a1, 41
+; CHECK-RV64-NEXT: bltz a2, .LBB61_983
+; CHECK-RV64-NEXT: j .LBB61_485
; CHECK-RV64-NEXT: .LBB61_983: # %cond.load1877
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17186,7 +19100,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_485
+; CHECK-RV64-NEXT: slli a2, a1, 40
+; CHECK-RV64-NEXT: bltz a2, .LBB61_984
+; CHECK-RV64-NEXT: j .LBB61_486
; CHECK-RV64-NEXT: .LBB61_984: # %cond.load1881
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17197,7 +19113,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_486
+; CHECK-RV64-NEXT: slli a2, a1, 39
+; CHECK-RV64-NEXT: bltz a2, .LBB61_985
+; CHECK-RV64-NEXT: j .LBB61_487
; CHECK-RV64-NEXT: .LBB61_985: # %cond.load1885
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17208,7 +19126,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_487
+; CHECK-RV64-NEXT: slli a2, a1, 38
+; CHECK-RV64-NEXT: bltz a2, .LBB61_986
+; CHECK-RV64-NEXT: j .LBB61_488
; CHECK-RV64-NEXT: .LBB61_986: # %cond.load1889
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17219,7 +19139,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_488
+; CHECK-RV64-NEXT: slli a2, a1, 37
+; CHECK-RV64-NEXT: bltz a2, .LBB61_987
+; CHECK-RV64-NEXT: j .LBB61_489
; CHECK-RV64-NEXT: .LBB61_987: # %cond.load1893
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17230,7 +19152,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_489
+; CHECK-RV64-NEXT: slli a2, a1, 36
+; CHECK-RV64-NEXT: bltz a2, .LBB61_988
+; CHECK-RV64-NEXT: j .LBB61_490
; CHECK-RV64-NEXT: .LBB61_988: # %cond.load1897
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17241,7 +19165,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_490
+; CHECK-RV64-NEXT: slli a2, a1, 35
+; CHECK-RV64-NEXT: bltz a2, .LBB61_989
+; CHECK-RV64-NEXT: j .LBB61_491
; CHECK-RV64-NEXT: .LBB61_989: # %cond.load1901
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17252,7 +19178,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_491
+; CHECK-RV64-NEXT: slli a2, a1, 34
+; CHECK-RV64-NEXT: bltz a2, .LBB61_990
+; CHECK-RV64-NEXT: j .LBB61_492
; CHECK-RV64-NEXT: .LBB61_990: # %cond.load1905
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17263,7 +19191,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_492
+; CHECK-RV64-NEXT: slli a2, a1, 33
+; CHECK-RV64-NEXT: bltz a2, .LBB61_991
+; CHECK-RV64-NEXT: j .LBB61_493
; CHECK-RV64-NEXT: .LBB61_991: # %cond.load1909
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17274,7 +19204,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_493
+; CHECK-RV64-NEXT: slli a2, a1, 32
+; CHECK-RV64-NEXT: bltz a2, .LBB61_992
+; CHECK-RV64-NEXT: j .LBB61_494
; CHECK-RV64-NEXT: .LBB61_992: # %cond.load1913
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17285,7 +19217,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_494
+; CHECK-RV64-NEXT: slli a2, a1, 31
+; CHECK-RV64-NEXT: bltz a2, .LBB61_993
+; CHECK-RV64-NEXT: j .LBB61_495
; CHECK-RV64-NEXT: .LBB61_993: # %cond.load1917
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17296,7 +19230,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_495
+; CHECK-RV64-NEXT: slli a2, a1, 30
+; CHECK-RV64-NEXT: bltz a2, .LBB61_994
+; CHECK-RV64-NEXT: j .LBB61_496
; CHECK-RV64-NEXT: .LBB61_994: # %cond.load1921
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17307,7 +19243,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_496
+; CHECK-RV64-NEXT: slli a2, a1, 29
+; CHECK-RV64-NEXT: bltz a2, .LBB61_995
+; CHECK-RV64-NEXT: j .LBB61_497
; CHECK-RV64-NEXT: .LBB61_995: # %cond.load1925
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17318,7 +19256,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_497
+; CHECK-RV64-NEXT: slli a2, a1, 28
+; CHECK-RV64-NEXT: bltz a2, .LBB61_996
+; CHECK-RV64-NEXT: j .LBB61_498
; CHECK-RV64-NEXT: .LBB61_996: # %cond.load1929
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17329,7 +19269,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_498
+; CHECK-RV64-NEXT: slli a2, a1, 27
+; CHECK-RV64-NEXT: bltz a2, .LBB61_997
+; CHECK-RV64-NEXT: j .LBB61_499
; CHECK-RV64-NEXT: .LBB61_997: # %cond.load1933
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17340,7 +19282,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_499
+; CHECK-RV64-NEXT: slli a2, a1, 26
+; CHECK-RV64-NEXT: bltz a2, .LBB61_998
+; CHECK-RV64-NEXT: j .LBB61_500
; CHECK-RV64-NEXT: .LBB61_998: # %cond.load1937
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17351,7 +19295,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_500
+; CHECK-RV64-NEXT: slli a2, a1, 25
+; CHECK-RV64-NEXT: bltz a2, .LBB61_999
+; CHECK-RV64-NEXT: j .LBB61_501
; CHECK-RV64-NEXT: .LBB61_999: # %cond.load1941
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17362,7 +19308,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_501
+; CHECK-RV64-NEXT: slli a2, a1, 24
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1000
+; CHECK-RV64-NEXT: j .LBB61_502
; CHECK-RV64-NEXT: .LBB61_1000: # %cond.load1945
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17373,7 +19321,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_502
+; CHECK-RV64-NEXT: slli a2, a1, 23
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1001
+; CHECK-RV64-NEXT: j .LBB61_503
; CHECK-RV64-NEXT: .LBB61_1001: # %cond.load1949
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17384,7 +19334,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_503
+; CHECK-RV64-NEXT: slli a2, a1, 22
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1002
+; CHECK-RV64-NEXT: j .LBB61_504
; CHECK-RV64-NEXT: .LBB61_1002: # %cond.load1953
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17395,7 +19347,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_504
+; CHECK-RV64-NEXT: slli a2, a1, 21
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1003
+; CHECK-RV64-NEXT: j .LBB61_505
; CHECK-RV64-NEXT: .LBB61_1003: # %cond.load1957
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17406,7 +19360,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_505
+; CHECK-RV64-NEXT: slli a2, a1, 20
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1004
+; CHECK-RV64-NEXT: j .LBB61_506
; CHECK-RV64-NEXT: .LBB61_1004: # %cond.load1961
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17417,7 +19373,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_506
+; CHECK-RV64-NEXT: slli a2, a1, 19
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1005
+; CHECK-RV64-NEXT: j .LBB61_507
; CHECK-RV64-NEXT: .LBB61_1005: # %cond.load1965
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17428,7 +19386,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_507
+; CHECK-RV64-NEXT: slli a2, a1, 18
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1006
+; CHECK-RV64-NEXT: j .LBB61_508
; CHECK-RV64-NEXT: .LBB61_1006: # %cond.load1969
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17439,7 +19399,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_508
+; CHECK-RV64-NEXT: slli a2, a1, 17
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1007
+; CHECK-RV64-NEXT: j .LBB61_509
; CHECK-RV64-NEXT: .LBB61_1007: # %cond.load1973
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17450,7 +19412,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_509
+; CHECK-RV64-NEXT: slli a2, a1, 16
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1008
+; CHECK-RV64-NEXT: j .LBB61_510
; CHECK-RV64-NEXT: .LBB61_1008: # %cond.load1977
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17461,7 +19425,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_510
+; CHECK-RV64-NEXT: slli a2, a1, 15
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1009
+; CHECK-RV64-NEXT: j .LBB61_511
; CHECK-RV64-NEXT: .LBB61_1009: # %cond.load1981
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17472,7 +19438,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_511
+; CHECK-RV64-NEXT: slli a2, a1, 14
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1010
+; CHECK-RV64-NEXT: j .LBB61_512
; CHECK-RV64-NEXT: .LBB61_1010: # %cond.load1985
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17483,7 +19451,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_512
+; CHECK-RV64-NEXT: slli a2, a1, 13
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1011
+; CHECK-RV64-NEXT: j .LBB61_513
; CHECK-RV64-NEXT: .LBB61_1011: # %cond.load1989
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17494,7 +19464,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_513
+; CHECK-RV64-NEXT: slli a2, a1, 12
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1012
+; CHECK-RV64-NEXT: j .LBB61_514
; CHECK-RV64-NEXT: .LBB61_1012: # %cond.load1993
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17505,7 +19477,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_514
+; CHECK-RV64-NEXT: slli a2, a1, 11
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1013
+; CHECK-RV64-NEXT: j .LBB61_515
; CHECK-RV64-NEXT: .LBB61_1013: # %cond.load1997
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17516,7 +19490,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_515
+; CHECK-RV64-NEXT: slli a2, a1, 10
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1014
+; CHECK-RV64-NEXT: j .LBB61_516
; CHECK-RV64-NEXT: .LBB61_1014: # %cond.load2001
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17527,7 +19503,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_516
+; CHECK-RV64-NEXT: slli a2, a1, 9
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1015
+; CHECK-RV64-NEXT: j .LBB61_517
; CHECK-RV64-NEXT: .LBB61_1015: # %cond.load2005
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17538,7 +19516,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_517
+; CHECK-RV64-NEXT: slli a2, a1, 8
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1016
+; CHECK-RV64-NEXT: j .LBB61_518
; CHECK-RV64-NEXT: .LBB61_1016: # %cond.load2009
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17549,7 +19529,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_518
+; CHECK-RV64-NEXT: slli a2, a1, 7
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1017
+; CHECK-RV64-NEXT: j .LBB61_519
; CHECK-RV64-NEXT: .LBB61_1017: # %cond.load2013
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17560,7 +19542,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_519
+; CHECK-RV64-NEXT: slli a2, a1, 6
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1018
+; CHECK-RV64-NEXT: j .LBB61_520
; CHECK-RV64-NEXT: .LBB61_1018: # %cond.load2017
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17571,7 +19555,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_520
+; CHECK-RV64-NEXT: slli a2, a1, 5
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1019
+; CHECK-RV64-NEXT: j .LBB61_521
; CHECK-RV64-NEXT: .LBB61_1019: # %cond.load2021
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17582,7 +19568,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_521
+; CHECK-RV64-NEXT: slli a2, a1, 4
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1020
+; CHECK-RV64-NEXT: j .LBB61_522
; CHECK-RV64-NEXT: .LBB61_1020: # %cond.load2025
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17593,7 +19581,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_522
+; CHECK-RV64-NEXT: slli a2, a1, 3
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1021
+; CHECK-RV64-NEXT: j .LBB61_523
; CHECK-RV64-NEXT: .LBB61_1021: # %cond.load2029
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17604,7 +19594,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_523
+; CHECK-RV64-NEXT: slli a2, a1, 2
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1022
+; CHECK-RV64-NEXT: j .LBB61_524
; CHECK-RV64-NEXT: .LBB61_1022: # %cond.load2033
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
@@ -17615,7 +19607,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, <
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, tu, ma
; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3
; CHECK-RV64-NEXT: addi a0, a0, 1
-; CHECK-RV64-NEXT: j .LBB61_524
+; CHECK-RV64-NEXT: slli a2, a1, 1
+; CHECK-RV64-NEXT: bltz a2, .LBB61_1023
+; CHECK-RV64-NEXT: j .LBB61_525
; CHECK-RV64-NEXT: .LBB61_1023: # %cond.load2037
; CHECK-RV64-NEXT: lbu a2, 0(a0)
; CHECK-RV64-NEXT: li a3, 512
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index ee9ec98d387c2..76eca8e034303 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -85,16 +85,17 @@ define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru)
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
-; RV64ZVE32F-NEXT: .LBB1_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB1_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB1_3: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
-; RV64ZVE32F-NEXT: j .LBB1_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
@@ -471,7 +472,7 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru)
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB8_5
-; RV64ZVE32F-NEXT: .LBB8_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB8_6
; RV64ZVE32F-NEXT: .LBB8_2: # %else2
@@ -480,14 +481,15 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru)
; RV64ZVE32F-NEXT: .LBB8_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB8_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else8
+; RV64ZVE32F-NEXT: .LBB8_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB8_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: j .LBB8_1
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB8_2
; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -495,14 +497,16 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: j .LBB8_2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB8_3
; RV64ZVE32F-NEXT: .LBB8_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: j .LBB8_3
+; RV64ZVE32F-NEXT: andi a1, a1, 8
+; RV64ZVE32F-NEXT: beqz a1, .LBB8_4
; RV64ZVE32F-NEXT: .LBB8_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
@@ -592,7 +596,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru)
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB11_9
-; RV64ZVE32F-NEXT: .LBB11_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB11_10
; RV64ZVE32F-NEXT: .LBB11_2: # %else2
@@ -613,14 +617,15 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru)
; RV64ZVE32F-NEXT: .LBB11_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB11_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else20
+; RV64ZVE32F-NEXT: .LBB11_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: j .LBB11_1
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -628,42 +633,48 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: j .LBB11_2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB11_3
; RV64ZVE32F-NEXT: .LBB11_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: j .LBB11_3
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB11_4
; RV64ZVE32F-NEXT: .LBB11_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
-; RV64ZVE32F-NEXT: j .LBB11_4
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB11_5
; RV64ZVE32F-NEXT: .LBB11_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
-; RV64ZVE32F-NEXT: j .LBB11_5
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB11_6
; RV64ZVE32F-NEXT: .LBB11_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
-; RV64ZVE32F-NEXT: j .LBB11_6
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: beqz a2, .LBB11_7
; RV64ZVE32F-NEXT: .LBB11_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
-; RV64ZVE32F-NEXT: j .LBB11_7
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB11_8
; RV64ZVE32F-NEXT: .LBB11_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
@@ -867,16 +878,17 @@ define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB14_3
-; RV64ZVE32F-NEXT: .LBB14_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB14_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB14_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB14_3: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
-; RV64ZVE32F-NEXT: j .LBB14_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB14_2
; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -1149,7 +1161,7 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB19_5
-; RV64ZVE32F-NEXT: .LBB19_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB19_6
; RV64ZVE32F-NEXT: .LBB19_2: # %else2
@@ -1158,14 +1170,15 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthr
; RV64ZVE32F-NEXT: .LBB19_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB19_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else8
+; RV64ZVE32F-NEXT: .LBB19_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB19_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: j .LBB19_1
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB19_2
; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -1173,14 +1186,16 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthr
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: j .LBB19_2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB19_3
; RV64ZVE32F-NEXT: .LBB19_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: j .LBB19_3
+; RV64ZVE32F-NEXT: andi a1, a1, 8
+; RV64ZVE32F-NEXT: beqz a1, .LBB19_4
; RV64ZVE32F-NEXT: .LBB19_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
@@ -1270,7 +1285,7 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB22_9
-; RV64ZVE32F-NEXT: .LBB22_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB22_10
; RV64ZVE32F-NEXT: .LBB22_2: # %else2
@@ -1291,14 +1306,15 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr
; RV64ZVE32F-NEXT: .LBB22_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB22_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else20
+; RV64ZVE32F-NEXT: .LBB22_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: j .LBB22_1
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB22_2
; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -1306,42 +1322,48 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: j .LBB22_2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB22_3
; RV64ZVE32F-NEXT: .LBB22_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: j .LBB22_3
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB22_4
; RV64ZVE32F-NEXT: .LBB22_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
-; RV64ZVE32F-NEXT: j .LBB22_4
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB22_5
; RV64ZVE32F-NEXT: .LBB22_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
-; RV64ZVE32F-NEXT: j .LBB22_5
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB22_6
; RV64ZVE32F-NEXT: .LBB22_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
-; RV64ZVE32F-NEXT: j .LBB22_6
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: beqz a2, .LBB22_7
; RV64ZVE32F-NEXT: .LBB22_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
-; RV64ZVE32F-NEXT: j .LBB22_7
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB22_8
; RV64ZVE32F-NEXT: .LBB22_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
@@ -1984,16 +2006,17 @@ define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB28_3
-; RV64ZVE32F-NEXT: .LBB28_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB28_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB28_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB28_3: # %cond.load
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
-; RV64ZVE32F-NEXT: j .LBB28_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB28_2
; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1
; RV64ZVE32F-NEXT: lw a0, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -2156,7 +2179,7 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB31_5
-; RV64ZVE32F-NEXT: .LBB31_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB31_6
; RV64ZVE32F-NEXT: .LBB31_2: # %else2
@@ -2165,14 +2188,15 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthr
; RV64ZVE32F-NEXT: .LBB31_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB31_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else8
+; RV64ZVE32F-NEXT: .LBB31_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB31_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: j .LBB31_1
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB31_2
; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
@@ -2180,14 +2204,16 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthr
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: j .LBB31_2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB31_3
; RV64ZVE32F-NEXT: .LBB31_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: j .LBB31_3
+; RV64ZVE32F-NEXT: andi a1, a1, 8
+; RV64ZVE32F-NEXT: beqz a1, .LBB31_4
; RV64ZVE32F-NEXT: .LBB31_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lw a0, 0(a0)
@@ -2276,7 +2302,7 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB34_9
-; RV64ZVE32F-NEXT: .LBB34_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB34_10
; RV64ZVE32F-NEXT: .LBB34_2: # %else2
@@ -2297,14 +2323,15 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr
; RV64ZVE32F-NEXT: .LBB34_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else20
+; RV64ZVE32F-NEXT: .LBB34_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: j .LBB34_1
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_2
; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
@@ -2312,42 +2339,48 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
-; RV64ZVE32F-NEXT: j .LBB34_2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_3
; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
-; RV64ZVE32F-NEXT: j .LBB34_3
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_4
; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
-; RV64ZVE32F-NEXT: j .LBB34_4
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_5
; RV64ZVE32F-NEXT: .LBB34_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
-; RV64ZVE32F-NEXT: j .LBB34_5
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_6
; RV64ZVE32F-NEXT: .LBB34_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
-; RV64ZVE32F-NEXT: j .LBB34_6
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_7
; RV64ZVE32F-NEXT: .LBB34_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
-; RV64ZVE32F-NEXT: j .LBB34_7
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB34_8
; RV64ZVE32F-NEXT: .LBB34_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lw a0, 0(a0)
@@ -3287,13 +3320,13 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB41_15
-; RV64ZVE32F-NEXT: .LBB41_7: # %else8
+; RV64ZVE32F-NEXT: # %bb.7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB41_16
; RV64ZVE32F-NEXT: .LBB41_8: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB41_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
+; RV64ZVE32F-NEXT: .LBB41_9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -3342,7 +3375,8 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 3
-; RV64ZVE32F-NEXT: j .LBB41_7
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB41_8
; RV64ZVE32F-NEXT: .LBB41_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -3351,7 +3385,9 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB41_8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB41_9
+; RV64ZVE32F-NEXT: j .LBB41_10
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
@@ -3423,30 +3459,30 @@ define <2 x i64> @mgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthr
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a4, v0
; RV32ZVE32F-NEXT: andi a2, a4, 1
-; RV32ZVE32F-NEXT: beqz a2, .LBB43_4
+; RV32ZVE32F-NEXT: beqz a2, .LBB43_3
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB43_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, a4, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB43_5
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB43_4
+; RV32ZVE32F-NEXT: .LBB43_2:
; RV32ZVE32F-NEXT: lw a4, 8(a1)
; RV32ZVE32F-NEXT: lw a1, 12(a1)
-; RV32ZVE32F-NEXT: j .LBB43_6
-; RV32ZVE32F-NEXT: .LBB43_4:
+; RV32ZVE32F-NEXT: j .LBB43_5
+; RV32ZVE32F-NEXT: .LBB43_3:
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a3, 4(a1)
-; RV32ZVE32F-NEXT: j .LBB43_2
-; RV32ZVE32F-NEXT: .LBB43_5: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, a4, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB43_2
+; RV32ZVE32F-NEXT: .LBB43_4: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a4, 0(a1)
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: .LBB43_6: # %else2
+; RV32ZVE32F-NEXT: .LBB43_5: # %else2
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -3496,56 +3532,56 @@ define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthr
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a6, v0
; RV32ZVE32F-NEXT: andi a2, a6, 1
-; RV32ZVE32F-NEXT: beqz a2, .LBB44_8
+; RV32ZVE32F-NEXT: beqz a2, .LBB44_5
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB44_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, a6, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB44_9
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB44_6
+; RV32ZVE32F-NEXT: .LBB44_2:
; RV32ZVE32F-NEXT: lw a4, 8(a1)
; RV32ZVE32F-NEXT: lw a5, 12(a1)
-; RV32ZVE32F-NEXT: .LBB44_4:
; RV32ZVE32F-NEXT: andi a7, a6, 4
-; RV32ZVE32F-NEXT: bnez a7, .LBB44_10
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a7, .LBB44_7
+; RV32ZVE32F-NEXT: .LBB44_3:
; RV32ZVE32F-NEXT: lw a7, 16(a1)
; RV32ZVE32F-NEXT: lw t0, 20(a1)
-; RV32ZVE32F-NEXT: .LBB44_6:
; RV32ZVE32F-NEXT: andi a6, a6, 8
-; RV32ZVE32F-NEXT: bnez a6, .LBB44_11
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez a6, .LBB44_8
+; RV32ZVE32F-NEXT: .LBB44_4:
; RV32ZVE32F-NEXT: lw a6, 24(a1)
; RV32ZVE32F-NEXT: lw a1, 28(a1)
-; RV32ZVE32F-NEXT: j .LBB44_12
-; RV32ZVE32F-NEXT: .LBB44_8:
+; RV32ZVE32F-NEXT: j .LBB44_9
+; RV32ZVE32F-NEXT: .LBB44_5:
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a3, 4(a1)
-; RV32ZVE32F-NEXT: j .LBB44_2
-; RV32ZVE32F-NEXT: .LBB44_9: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, a6, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB44_2
+; RV32ZVE32F-NEXT: .LBB44_6: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v9
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB44_4
-; RV32ZVE32F-NEXT: .LBB44_10: # %cond.load4
+; RV32ZVE32F-NEXT: andi a7, a6, 4
+; RV32ZVE32F-NEXT: beqz a7, .LBB44_3
+; RV32ZVE32F-NEXT: .LBB44_7: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s t0, v9
; RV32ZVE32F-NEXT: lw a7, 0(t0)
; RV32ZVE32F-NEXT: lw t0, 4(t0)
-; RV32ZVE32F-NEXT: j .LBB44_6
-; RV32ZVE32F-NEXT: .LBB44_11: # %cond.load7
+; RV32ZVE32F-NEXT: andi a6, a6, 8
+; RV32ZVE32F-NEXT: beqz a6, .LBB44_4
+; RV32ZVE32F-NEXT: .LBB44_8: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a6, 0(a1)
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: .LBB44_12: # %else8
+; RV32ZVE32F-NEXT: .LBB44_9: # %else8
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -3561,41 +3597,41 @@ define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthr
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB44_8
+; RV64ZVE32F-NEXT: beqz a3, .LBB44_5
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB44_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB44_9
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB44_6
+; RV64ZVE32F-NEXT: .LBB44_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: .LBB44_4:
; RV64ZVE32F-NEXT: andi a6, a5, 4
-; RV64ZVE32F-NEXT: bnez a6, .LBB44_10
-; RV64ZVE32F-NEXT: # %bb.5:
+; RV64ZVE32F-NEXT: bnez a6, .LBB44_7
+; RV64ZVE32F-NEXT: .LBB44_3:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: .LBB44_6:
; RV64ZVE32F-NEXT: andi a5, a5, 8
-; RV64ZVE32F-NEXT: bnez a5, .LBB44_11
-; RV64ZVE32F-NEXT: # %bb.7:
+; RV64ZVE32F-NEXT: bnez a5, .LBB44_8
+; RV64ZVE32F-NEXT: .LBB44_4:
; RV64ZVE32F-NEXT: ld a1, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB44_12
-; RV64ZVE32F-NEXT: .LBB44_8:
+; RV64ZVE32F-NEXT: j .LBB44_9
+; RV64ZVE32F-NEXT: .LBB44_5:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB44_2
-; RV64ZVE32F-NEXT: .LBB44_9: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a5, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB44_2
+; RV64ZVE32F-NEXT: .LBB44_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB44_4
-; RV64ZVE32F-NEXT: .LBB44_10: # %cond.load4
+; RV64ZVE32F-NEXT: andi a6, a5, 4
+; RV64ZVE32F-NEXT: beqz a6, .LBB44_3
+; RV64ZVE32F-NEXT: .LBB44_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a6, 16(a1)
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: j .LBB44_6
-; RV64ZVE32F-NEXT: .LBB44_11: # %cond.load7
+; RV64ZVE32F-NEXT: andi a5, a5, 8
+; RV64ZVE32F-NEXT: beqz a5, .LBB44_4
+; RV64ZVE32F-NEXT: .LBB44_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB44_12: # %else8
+; RV64ZVE32F-NEXT: .LBB44_9: # %else8
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -3736,82 +3772,82 @@ define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthr
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a2, t0, 1
-; RV32ZVE32F-NEXT: beqz a2, .LBB47_12
+; RV32ZVE32F-NEXT: beqz a2, .LBB47_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB47_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB47_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB47_8
+; RV32ZVE32F-NEXT: .LBB47_2:
; RV32ZVE32F-NEXT: lw a4, 8(a1)
; RV32ZVE32F-NEXT: lw a5, 12(a1)
-; RV32ZVE32F-NEXT: .LBB47_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB47_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB47_9
+; RV32ZVE32F-NEXT: .LBB47_3:
; RV32ZVE32F-NEXT: lw a6, 16(a1)
; RV32ZVE32F-NEXT: lw a7, 20(a1)
-; RV32ZVE32F-NEXT: .LBB47_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB47_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB47_10
+; RV32ZVE32F-NEXT: .LBB47_4:
; RV32ZVE32F-NEXT: lw t1, 24(a1)
; RV32ZVE32F-NEXT: lw t2, 28(a1)
-; RV32ZVE32F-NEXT: .LBB47_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB47_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB47_11
+; RV32ZVE32F-NEXT: .LBB47_5:
; RV32ZVE32F-NEXT: lw t3, 32(a1)
; RV32ZVE32F-NEXT: lw t4, 36(a1)
-; RV32ZVE32F-NEXT: .LBB47_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB47_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB47_12
+; RV32ZVE32F-NEXT: .LBB47_6:
; RV32ZVE32F-NEXT: lw t5, 40(a1)
; RV32ZVE32F-NEXT: lw t6, 44(a1)
-; RV32ZVE32F-NEXT: j .LBB47_18
-; RV32ZVE32F-NEXT: .LBB47_12:
+; RV32ZVE32F-NEXT: j .LBB47_13
+; RV32ZVE32F-NEXT: .LBB47_7:
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a3, 4(a1)
-; RV32ZVE32F-NEXT: j .LBB47_2
-; RV32ZVE32F-NEXT: .LBB47_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB47_2
+; RV32ZVE32F-NEXT: .LBB47_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB47_4
-; RV32ZVE32F-NEXT: .LBB47_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB47_3
+; RV32ZVE32F-NEXT: .LBB47_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB47_6
-; RV32ZVE32F-NEXT: .LBB47_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB47_4
+; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB47_8
-; RV32ZVE32F-NEXT: .LBB47_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB47_5
+; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB47_10
-; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB47_6
+; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB47_18: # %else14
+; RV32ZVE32F-NEXT: .LBB47_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -3819,31 +3855,31 @@ define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthr
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB47_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB47_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB47_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB47_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB47_17
+; RV32ZVE32F-NEXT: .LBB47_15:
; RV32ZVE32F-NEXT: lw t0, 56(a1)
; RV32ZVE32F-NEXT: lw a1, 60(a1)
-; RV32ZVE32F-NEXT: j .LBB47_24
-; RV32ZVE32F-NEXT: .LBB47_22:
+; RV32ZVE32F-NEXT: j .LBB47_18
+; RV32ZVE32F-NEXT: .LBB47_16:
; RV32ZVE32F-NEXT: lw s0, 48(a1)
; RV32ZVE32F-NEXT: lw s1, 52(a1)
-; RV32ZVE32F-NEXT: j .LBB47_20
-; RV32ZVE32F-NEXT: .LBB47_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB47_15
+; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw t0, 0(a1)
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: .LBB47_24: # %else20
+; RV32ZVE32F-NEXT: .LBB47_18: # %else20
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -3873,77 +3909,77 @@ define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthr
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a6, v0
; RV64ZVE32F-NEXT: andi a3, a6, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB47_16
+; RV64ZVE32F-NEXT: beqz a3, .LBB47_9
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB47_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a6, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB47_17
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB47_10
+; RV64ZVE32F-NEXT: .LBB47_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: .LBB47_4:
; RV64ZVE32F-NEXT: andi a5, a6, 4
-; RV64ZVE32F-NEXT: bnez a5, .LBB47_18
-; RV64ZVE32F-NEXT: # %bb.5:
+; RV64ZVE32F-NEXT: bnez a5, .LBB47_11
+; RV64ZVE32F-NEXT: .LBB47_3:
; RV64ZVE32F-NEXT: ld a5, 16(a2)
-; RV64ZVE32F-NEXT: .LBB47_6:
; RV64ZVE32F-NEXT: andi a7, a6, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB47_19
-; RV64ZVE32F-NEXT: # %bb.7:
+; RV64ZVE32F-NEXT: bnez a7, .LBB47_12
+; RV64ZVE32F-NEXT: .LBB47_4:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: .LBB47_8:
; RV64ZVE32F-NEXT: andi t0, a6, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB47_20
-; RV64ZVE32F-NEXT: # %bb.9:
+; RV64ZVE32F-NEXT: bnez t0, .LBB47_13
+; RV64ZVE32F-NEXT: .LBB47_5:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB47_10:
; RV64ZVE32F-NEXT: andi t1, a6, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB47_21
-; RV64ZVE32F-NEXT: # %bb.11:
+; RV64ZVE32F-NEXT: bnez t1, .LBB47_14
+; RV64ZVE32F-NEXT: .LBB47_6:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: .LBB47_12:
; RV64ZVE32F-NEXT: andi t2, a6, 64
-; RV64ZVE32F-NEXT: bnez t2, .LBB47_22
-; RV64ZVE32F-NEXT: # %bb.13:
+; RV64ZVE32F-NEXT: bnez t2, .LBB47_15
+; RV64ZVE32F-NEXT: .LBB47_7:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: .LBB47_14:
; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: bnez a6, .LBB47_23
-; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: bnez a6, .LBB47_16
+; RV64ZVE32F-NEXT: .LBB47_8:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB47_24
-; RV64ZVE32F-NEXT: .LBB47_16:
+; RV64ZVE32F-NEXT: j .LBB47_17
+; RV64ZVE32F-NEXT: .LBB47_9:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB47_2
-; RV64ZVE32F-NEXT: .LBB47_17: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a6, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB47_2
+; RV64ZVE32F-NEXT: .LBB47_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB47_4
-; RV64ZVE32F-NEXT: .LBB47_18: # %cond.load4
+; RV64ZVE32F-NEXT: andi a5, a6, 4
+; RV64ZVE32F-NEXT: beqz a5, .LBB47_3
+; RV64ZVE32F-NEXT: .LBB47_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a5, 16(a1)
; RV64ZVE32F-NEXT: ld a5, 0(a5)
-; RV64ZVE32F-NEXT: j .LBB47_6
-; RV64ZVE32F-NEXT: .LBB47_19: # %cond.load7
+; RV64ZVE32F-NEXT: andi a7, a6, 8
+; RV64ZVE32F-NEXT: beqz a7, .LBB47_4
+; RV64ZVE32F-NEXT: .LBB47_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a7, 24(a1)
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB47_8
-; RV64ZVE32F-NEXT: .LBB47_20: # %cond.load10
+; RV64ZVE32F-NEXT: andi t0, a6, 16
+; RV64ZVE32F-NEXT: beqz t0, .LBB47_5
+; RV64ZVE32F-NEXT: .LBB47_13: # %cond.load10
; RV64ZVE32F-NEXT: ld t0, 32(a1)
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB47_10
-; RV64ZVE32F-NEXT: .LBB47_21: # %cond.load13
+; RV64ZVE32F-NEXT: andi t1, a6, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB47_6
+; RV64ZVE32F-NEXT: .LBB47_14: # %cond.load13
; RV64ZVE32F-NEXT: ld t1, 40(a1)
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: j .LBB47_12
-; RV64ZVE32F-NEXT: .LBB47_22: # %cond.load16
+; RV64ZVE32F-NEXT: andi t2, a6, 64
+; RV64ZVE32F-NEXT: beqz t2, .LBB47_7
+; RV64ZVE32F-NEXT: .LBB47_15: # %cond.load16
; RV64ZVE32F-NEXT: ld t2, 48(a1)
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: j .LBB47_14
-; RV64ZVE32F-NEXT: .LBB47_23: # %cond.load19
+; RV64ZVE32F-NEXT: andi a6, a6, -128
+; RV64ZVE32F-NEXT: beqz a6, .LBB47_8
+; RV64ZVE32F-NEXT: .LBB47_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a1, 56(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB47_24: # %else20
+; RV64ZVE32F-NEXT: .LBB47_17: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a5, 16(a0)
@@ -3987,81 +4023,81 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB48_12
+; RV32ZVE32F-NEXT: beqz a3, .LBB48_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB48_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB48_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB48_8
+; RV32ZVE32F-NEXT: .LBB48_2:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
-; RV32ZVE32F-NEXT: .LBB48_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB48_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB48_9
+; RV32ZVE32F-NEXT: .LBB48_3:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
-; RV32ZVE32F-NEXT: .LBB48_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB48_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB48_10
+; RV32ZVE32F-NEXT: .LBB48_4:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
-; RV32ZVE32F-NEXT: .LBB48_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB48_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB48_11
+; RV32ZVE32F-NEXT: .LBB48_5:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
-; RV32ZVE32F-NEXT: .LBB48_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB48_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB48_12
+; RV32ZVE32F-NEXT: .LBB48_6:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB48_18
-; RV32ZVE32F-NEXT: .LBB48_12:
+; RV32ZVE32F-NEXT: j .LBB48_13
+; RV32ZVE32F-NEXT: .LBB48_7:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: j .LBB48_2
-; RV32ZVE32F-NEXT: .LBB48_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB48_2
+; RV32ZVE32F-NEXT: .LBB48_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB48_4
-; RV32ZVE32F-NEXT: .LBB48_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB48_3
+; RV32ZVE32F-NEXT: .LBB48_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB48_6
-; RV32ZVE32F-NEXT: .LBB48_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB48_4
+; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB48_8
-; RV32ZVE32F-NEXT: .LBB48_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB48_5
+; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB48_10
-; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB48_6
+; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB48_18: # %else14
+; RV32ZVE32F-NEXT: .LBB48_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -4069,31 +4105,31 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB48_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB48_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB48_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB48_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB48_17
+; RV32ZVE32F-NEXT: .LBB48_15:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB48_24
-; RV32ZVE32F-NEXT: .LBB48_22:
+; RV32ZVE32F-NEXT: j .LBB48_18
+; RV32ZVE32F-NEXT: .LBB48_16:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: j .LBB48_20
-; RV32ZVE32F-NEXT: .LBB48_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB48_15
+; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB48_24: # %else20
+; RV32ZVE32F-NEXT: .LBB48_18: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -4123,105 +4159,105 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB48_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB48_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB48_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB48_5
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB48_4
+; RV64ZVE32F-NEXT: .LBB48_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB48_6
-; RV64ZVE32F-NEXT: .LBB48_4:
+; RV64ZVE32F-NEXT: j .LBB48_5
+; RV64ZVE32F-NEXT: .LBB48_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB48_2
-; RV64ZVE32F-NEXT: .LBB48_5: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a5, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB48_2
+; RV64ZVE32F-NEXT: .LBB48_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB48_6: # %else2
+; RV64ZVE32F-NEXT: .LBB48_5: # %else2
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB48_10
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB48_8
+; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: .LBB48_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB48_11
-; RV64ZVE32F-NEXT: # %bb.9:
+; RV64ZVE32F-NEXT: bnez a7, .LBB48_9
+; RV64ZVE32F-NEXT: .LBB48_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB48_12
-; RV64ZVE32F-NEXT: .LBB48_10:
+; RV64ZVE32F-NEXT: j .LBB48_10
+; RV64ZVE32F-NEXT: .LBB48_8:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: j .LBB48_8
-; RV64ZVE32F-NEXT: .LBB48_11: # %cond.load7
+; RV64ZVE32F-NEXT: andi a7, a5, 8
+; RV64ZVE32F-NEXT: beqz a7, .LBB48_7
+; RV64ZVE32F-NEXT: .LBB48_9: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: .LBB48_12: # %else8
+; RV64ZVE32F-NEXT: .LBB48_10: # %else8
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
-; RV64ZVE32F-NEXT: beqz t0, .LBB48_16
-; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: beqz t0, .LBB48_13
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: .LBB48_14: # %cond.load10
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB48_17
-; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: bnez t1, .LBB48_14
+; RV64ZVE32F-NEXT: .LBB48_12:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB48_18
-; RV64ZVE32F-NEXT: .LBB48_16:
+; RV64ZVE32F-NEXT: j .LBB48_15
+; RV64ZVE32F-NEXT: .LBB48_13:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: j .LBB48_14
-; RV64ZVE32F-NEXT: .LBB48_17: # %cond.load13
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB48_12
+; RV64ZVE32F-NEXT: .LBB48_14: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB48_18: # %else14
+; RV64ZVE32F-NEXT: .LBB48_15: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB48_22
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB48_18
+; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: .LBB48_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB48_23
-; RV64ZVE32F-NEXT: # %bb.21:
+; RV64ZVE32F-NEXT: bnez a5, .LBB48_19
+; RV64ZVE32F-NEXT: .LBB48_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB48_24
-; RV64ZVE32F-NEXT: .LBB48_22:
-; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: j .LBB48_20
-; RV64ZVE32F-NEXT: .LBB48_23: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB48_18:
+; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB48_17
+; RV64ZVE32F-NEXT: .LBB48_19: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB48_24: # %else20
+; RV64ZVE32F-NEXT: .LBB48_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -4266,81 +4302,81 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB49_12
+; RV32ZVE32F-NEXT: beqz a3, .LBB49_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB49_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB49_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB49_8
+; RV32ZVE32F-NEXT: .LBB49_2:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
-; RV32ZVE32F-NEXT: .LBB49_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB49_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB49_9
+; RV32ZVE32F-NEXT: .LBB49_3:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
-; RV32ZVE32F-NEXT: .LBB49_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB49_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB49_10
+; RV32ZVE32F-NEXT: .LBB49_4:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
-; RV32ZVE32F-NEXT: .LBB49_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB49_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB49_11
+; RV32ZVE32F-NEXT: .LBB49_5:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
-; RV32ZVE32F-NEXT: .LBB49_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB49_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB49_12
+; RV32ZVE32F-NEXT: .LBB49_6:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB49_18
-; RV32ZVE32F-NEXT: .LBB49_12:
+; RV32ZVE32F-NEXT: j .LBB49_13
+; RV32ZVE32F-NEXT: .LBB49_7:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: j .LBB49_2
-; RV32ZVE32F-NEXT: .LBB49_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB49_2
+; RV32ZVE32F-NEXT: .LBB49_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB49_4
-; RV32ZVE32F-NEXT: .LBB49_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB49_3
+; RV32ZVE32F-NEXT: .LBB49_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB49_6
-; RV32ZVE32F-NEXT: .LBB49_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB49_4
+; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB49_8
-; RV32ZVE32F-NEXT: .LBB49_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB49_5
+; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB49_10
-; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB49_6
+; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB49_18: # %else14
+; RV32ZVE32F-NEXT: .LBB49_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -4348,31 +4384,31 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB49_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB49_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB49_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB49_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB49_17
+; RV32ZVE32F-NEXT: .LBB49_15:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB49_24
-; RV32ZVE32F-NEXT: .LBB49_22:
+; RV32ZVE32F-NEXT: j .LBB49_18
+; RV32ZVE32F-NEXT: .LBB49_16:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: j .LBB49_20
-; RV32ZVE32F-NEXT: .LBB49_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB49_15
+; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB49_24: # %else20
+; RV32ZVE32F-NEXT: .LBB49_18: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -4402,105 +4438,105 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB49_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB49_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB49_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB49_5
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB49_4
+; RV64ZVE32F-NEXT: .LBB49_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB49_6
-; RV64ZVE32F-NEXT: .LBB49_4:
+; RV64ZVE32F-NEXT: j .LBB49_5
+; RV64ZVE32F-NEXT: .LBB49_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB49_2
-; RV64ZVE32F-NEXT: .LBB49_5: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a5, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB49_2
+; RV64ZVE32F-NEXT: .LBB49_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB49_6: # %else2
+; RV64ZVE32F-NEXT: .LBB49_5: # %else2
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB49_10
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB49_8
+; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: .LBB49_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB49_11
-; RV64ZVE32F-NEXT: # %bb.9:
+; RV64ZVE32F-NEXT: bnez a7, .LBB49_9
+; RV64ZVE32F-NEXT: .LBB49_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB49_12
-; RV64ZVE32F-NEXT: .LBB49_10:
+; RV64ZVE32F-NEXT: j .LBB49_10
+; RV64ZVE32F-NEXT: .LBB49_8:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: j .LBB49_8
-; RV64ZVE32F-NEXT: .LBB49_11: # %cond.load7
+; RV64ZVE32F-NEXT: andi a7, a5, 8
+; RV64ZVE32F-NEXT: beqz a7, .LBB49_7
+; RV64ZVE32F-NEXT: .LBB49_9: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: .LBB49_12: # %else8
+; RV64ZVE32F-NEXT: .LBB49_10: # %else8
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
-; RV64ZVE32F-NEXT: beqz t0, .LBB49_16
-; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: beqz t0, .LBB49_13
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: .LBB49_14: # %cond.load10
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB49_17
-; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: bnez t1, .LBB49_14
+; RV64ZVE32F-NEXT: .LBB49_12:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB49_18
-; RV64ZVE32F-NEXT: .LBB49_16:
+; RV64ZVE32F-NEXT: j .LBB49_15
+; RV64ZVE32F-NEXT: .LBB49_13:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: j .LBB49_14
-; RV64ZVE32F-NEXT: .LBB49_17: # %cond.load13
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB49_12
+; RV64ZVE32F-NEXT: .LBB49_14: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB49_18: # %else14
+; RV64ZVE32F-NEXT: .LBB49_15: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB49_22
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB49_18
+; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: .LBB49_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB49_23
-; RV64ZVE32F-NEXT: # %bb.21:
+; RV64ZVE32F-NEXT: bnez a5, .LBB49_19
+; RV64ZVE32F-NEXT: .LBB49_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB49_24
-; RV64ZVE32F-NEXT: .LBB49_22:
-; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: j .LBB49_20
-; RV64ZVE32F-NEXT: .LBB49_23: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB49_18:
+; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB49_17
+; RV64ZVE32F-NEXT: .LBB49_19: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB49_24: # %else20
+; RV64ZVE32F-NEXT: .LBB49_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -4547,81 +4583,81 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB50_12
+; RV32ZVE32F-NEXT: beqz a3, .LBB50_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB50_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB50_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB50_8
+; RV32ZVE32F-NEXT: .LBB50_2:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
-; RV32ZVE32F-NEXT: .LBB50_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB50_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB50_9
+; RV32ZVE32F-NEXT: .LBB50_3:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
-; RV32ZVE32F-NEXT: .LBB50_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB50_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB50_10
+; RV32ZVE32F-NEXT: .LBB50_4:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
-; RV32ZVE32F-NEXT: .LBB50_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB50_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB50_11
+; RV32ZVE32F-NEXT: .LBB50_5:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
-; RV32ZVE32F-NEXT: .LBB50_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB50_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB50_12
+; RV32ZVE32F-NEXT: .LBB50_6:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB50_18
-; RV32ZVE32F-NEXT: .LBB50_12:
+; RV32ZVE32F-NEXT: j .LBB50_13
+; RV32ZVE32F-NEXT: .LBB50_7:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: j .LBB50_2
-; RV32ZVE32F-NEXT: .LBB50_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB50_2
+; RV32ZVE32F-NEXT: .LBB50_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB50_4
-; RV32ZVE32F-NEXT: .LBB50_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB50_3
+; RV32ZVE32F-NEXT: .LBB50_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB50_6
-; RV32ZVE32F-NEXT: .LBB50_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB50_4
+; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB50_8
-; RV32ZVE32F-NEXT: .LBB50_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB50_5
+; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB50_10
-; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB50_6
+; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB50_18: # %else14
+; RV32ZVE32F-NEXT: .LBB50_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -4629,31 +4665,31 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB50_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB50_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB50_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB50_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB50_17
+; RV32ZVE32F-NEXT: .LBB50_15:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB50_24
-; RV32ZVE32F-NEXT: .LBB50_22:
+; RV32ZVE32F-NEXT: j .LBB50_18
+; RV32ZVE32F-NEXT: .LBB50_16:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: j .LBB50_20
-; RV32ZVE32F-NEXT: .LBB50_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB50_15
+; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB50_24: # %else20
+; RV32ZVE32F-NEXT: .LBB50_18: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -4683,23 +4719,23 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB50_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB50_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: zext.b a3, a3
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB50_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB50_5
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB50_4
+; RV64ZVE32F-NEXT: .LBB50_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB50_6
-; RV64ZVE32F-NEXT: .LBB50_4:
+; RV64ZVE32F-NEXT: j .LBB50_5
+; RV64ZVE32F-NEXT: .LBB50_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB50_2
-; RV64ZVE32F-NEXT: .LBB50_5: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a5, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB50_2
+; RV64ZVE32F-NEXT: .LBB50_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
@@ -4707,54 +4743,54 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB50_6: # %else2
+; RV64ZVE32F-NEXT: .LBB50_5: # %else2
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB50_10
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB50_8
+; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: zext.b a6, a6
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: .LBB50_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB50_11
-; RV64ZVE32F-NEXT: # %bb.9:
+; RV64ZVE32F-NEXT: bnez a7, .LBB50_9
+; RV64ZVE32F-NEXT: .LBB50_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB50_12
-; RV64ZVE32F-NEXT: .LBB50_10:
+; RV64ZVE32F-NEXT: j .LBB50_10
+; RV64ZVE32F-NEXT: .LBB50_8:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: j .LBB50_8
-; RV64ZVE32F-NEXT: .LBB50_11: # %cond.load7
+; RV64ZVE32F-NEXT: andi a7, a5, 8
+; RV64ZVE32F-NEXT: beqz a7, .LBB50_7
+; RV64ZVE32F-NEXT: .LBB50_9: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: zext.b a7, a7
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: .LBB50_12: # %else8
+; RV64ZVE32F-NEXT: .LBB50_10: # %else8
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
-; RV64ZVE32F-NEXT: beqz t0, .LBB50_16
-; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: beqz t0, .LBB50_13
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: zext.b t0, t0
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: .LBB50_14: # %cond.load10
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB50_17
-; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: bnez t1, .LBB50_14
+; RV64ZVE32F-NEXT: .LBB50_12:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB50_18
-; RV64ZVE32F-NEXT: .LBB50_16:
+; RV64ZVE32F-NEXT: j .LBB50_15
+; RV64ZVE32F-NEXT: .LBB50_13:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: j .LBB50_14
-; RV64ZVE32F-NEXT: .LBB50_17: # %cond.load13
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB50_12
+; RV64ZVE32F-NEXT: .LBB50_14: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
@@ -4762,34 +4798,34 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB50_18: # %else14
+; RV64ZVE32F-NEXT: .LBB50_15: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB50_22
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB50_18
+; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: zext.b t2, t2
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: .LBB50_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB50_23
-; RV64ZVE32F-NEXT: # %bb.21:
+; RV64ZVE32F-NEXT: bnez a5, .LBB50_19
+; RV64ZVE32F-NEXT: .LBB50_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB50_24
-; RV64ZVE32F-NEXT: .LBB50_22:
-; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: j .LBB50_20
-; RV64ZVE32F-NEXT: .LBB50_23: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB50_18:
+; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB50_17
+; RV64ZVE32F-NEXT: .LBB50_19: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: zext.b a2, a2
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB50_24: # %else20
+; RV64ZVE32F-NEXT: .LBB50_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -4835,82 +4871,82 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8
-; RV32ZVE32F-NEXT: beqz a3, .LBB51_12
+; RV32ZVE32F-NEXT: beqz a3, .LBB51_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v10
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB51_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB51_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB51_8
+; RV32ZVE32F-NEXT: .LBB51_2:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
-; RV32ZVE32F-NEXT: .LBB51_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB51_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB51_9
+; RV32ZVE32F-NEXT: .LBB51_3:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
-; RV32ZVE32F-NEXT: .LBB51_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB51_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB51_10
+; RV32ZVE32F-NEXT: .LBB51_4:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
-; RV32ZVE32F-NEXT: .LBB51_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB51_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB51_11
+; RV32ZVE32F-NEXT: .LBB51_5:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
-; RV32ZVE32F-NEXT: .LBB51_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB51_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB51_12
+; RV32ZVE32F-NEXT: .LBB51_6:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB51_18
-; RV32ZVE32F-NEXT: .LBB51_12:
+; RV32ZVE32F-NEXT: j .LBB51_13
+; RV32ZVE32F-NEXT: .LBB51_7:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: j .LBB51_2
-; RV32ZVE32F-NEXT: .LBB51_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB51_2
+; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v8
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB51_4
-; RV32ZVE32F-NEXT: .LBB51_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB51_3
+; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v8
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB51_6
-; RV32ZVE32F-NEXT: .LBB51_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB51_4
+; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v8
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB51_8
-; RV32ZVE32F-NEXT: .LBB51_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB51_5
+; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v8
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB51_10
-; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB51_6
+; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v8
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB51_18: # %else14
+; RV32ZVE32F-NEXT: .LBB51_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -4918,31 +4954,31 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB51_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB51_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v8
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB51_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB51_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB51_17
+; RV32ZVE32F-NEXT: .LBB51_15:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB51_24
-; RV32ZVE32F-NEXT: .LBB51_22:
+; RV32ZVE32F-NEXT: j .LBB51_18
+; RV32ZVE32F-NEXT: .LBB51_16:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: j .LBB51_20
-; RV32ZVE32F-NEXT: .LBB51_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB51_15
+; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB51_24: # %else20
+; RV32ZVE32F-NEXT: .LBB51_18: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -4972,106 +5008,106 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB51_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB51_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB51_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB51_5
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB51_4
+; RV64ZVE32F-NEXT: .LBB51_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB51_6
-; RV64ZVE32F-NEXT: .LBB51_4:
+; RV64ZVE32F-NEXT: j .LBB51_5
+; RV64ZVE32F-NEXT: .LBB51_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB51_2
-; RV64ZVE32F-NEXT: .LBB51_5: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a5, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB51_2
+; RV64ZVE32F-NEXT: .LBB51_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB51_6: # %else2
+; RV64ZVE32F-NEXT: .LBB51_5: # %else2
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB51_10
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB51_8
+; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: .LBB51_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB51_11
-; RV64ZVE32F-NEXT: # %bb.9:
+; RV64ZVE32F-NEXT: bnez a7, .LBB51_9
+; RV64ZVE32F-NEXT: .LBB51_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB51_12
-; RV64ZVE32F-NEXT: .LBB51_10:
+; RV64ZVE32F-NEXT: j .LBB51_10
+; RV64ZVE32F-NEXT: .LBB51_8:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: j .LBB51_8
-; RV64ZVE32F-NEXT: .LBB51_11: # %cond.load7
+; RV64ZVE32F-NEXT: andi a7, a5, 8
+; RV64ZVE32F-NEXT: beqz a7, .LBB51_7
+; RV64ZVE32F-NEXT: .LBB51_9: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: .LBB51_12: # %else8
+; RV64ZVE32F-NEXT: .LBB51_10: # %else8
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
-; RV64ZVE32F-NEXT: beqz t0, .LBB51_16
-; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: beqz t0, .LBB51_13
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: .LBB51_14: # %cond.load10
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB51_17
-; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: bnez t1, .LBB51_14
+; RV64ZVE32F-NEXT: .LBB51_12:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB51_18
-; RV64ZVE32F-NEXT: .LBB51_16:
+; RV64ZVE32F-NEXT: j .LBB51_15
+; RV64ZVE32F-NEXT: .LBB51_13:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: j .LBB51_14
-; RV64ZVE32F-NEXT: .LBB51_17: # %cond.load13
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB51_12
+; RV64ZVE32F-NEXT: .LBB51_14: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB51_18: # %else14
+; RV64ZVE32F-NEXT: .LBB51_15: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB51_22
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB51_18
+; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: .LBB51_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB51_23
-; RV64ZVE32F-NEXT: # %bb.21:
+; RV64ZVE32F-NEXT: bnez a5, .LBB51_19
+; RV64ZVE32F-NEXT: .LBB51_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB51_24
-; RV64ZVE32F-NEXT: .LBB51_22:
-; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: j .LBB51_20
-; RV64ZVE32F-NEXT: .LBB51_23: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB51_18:
+; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB51_17
+; RV64ZVE32F-NEXT: .LBB51_19: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB51_24: # %else20
+; RV64ZVE32F-NEXT: .LBB51_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -5116,82 +5152,82 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8
-; RV32ZVE32F-NEXT: beqz a3, .LBB52_12
+; RV32ZVE32F-NEXT: beqz a3, .LBB52_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v10
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB52_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB52_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB52_8
+; RV32ZVE32F-NEXT: .LBB52_2:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
-; RV32ZVE32F-NEXT: .LBB52_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB52_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB52_9
+; RV32ZVE32F-NEXT: .LBB52_3:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
-; RV32ZVE32F-NEXT: .LBB52_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB52_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB52_10
+; RV32ZVE32F-NEXT: .LBB52_4:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
-; RV32ZVE32F-NEXT: .LBB52_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB52_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB52_11
+; RV32ZVE32F-NEXT: .LBB52_5:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
-; RV32ZVE32F-NEXT: .LBB52_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB52_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB52_12
+; RV32ZVE32F-NEXT: .LBB52_6:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB52_18
-; RV32ZVE32F-NEXT: .LBB52_12:
+; RV32ZVE32F-NEXT: j .LBB52_13
+; RV32ZVE32F-NEXT: .LBB52_7:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: j .LBB52_2
-; RV32ZVE32F-NEXT: .LBB52_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB52_2
+; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v8
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB52_4
-; RV32ZVE32F-NEXT: .LBB52_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB52_3
+; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v8
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB52_6
-; RV32ZVE32F-NEXT: .LBB52_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB52_4
+; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v8
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB52_8
-; RV32ZVE32F-NEXT: .LBB52_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB52_5
+; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v8
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB52_10
-; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB52_6
+; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v8
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB52_18: # %else14
+; RV32ZVE32F-NEXT: .LBB52_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -5199,31 +5235,31 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB52_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB52_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v8
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB52_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB52_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB52_17
+; RV32ZVE32F-NEXT: .LBB52_15:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB52_24
-; RV32ZVE32F-NEXT: .LBB52_22:
+; RV32ZVE32F-NEXT: j .LBB52_18
+; RV32ZVE32F-NEXT: .LBB52_16:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: j .LBB52_20
-; RV32ZVE32F-NEXT: .LBB52_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB52_15
+; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB52_24: # %else20
+; RV32ZVE32F-NEXT: .LBB52_18: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -5253,106 +5289,106 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB52_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB52_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB52_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB52_5
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB52_4
+; RV64ZVE32F-NEXT: .LBB52_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB52_6
-; RV64ZVE32F-NEXT: .LBB52_4:
+; RV64ZVE32F-NEXT: j .LBB52_5
+; RV64ZVE32F-NEXT: .LBB52_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB52_2
-; RV64ZVE32F-NEXT: .LBB52_5: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a5, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB52_2
+; RV64ZVE32F-NEXT: .LBB52_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB52_6: # %else2
+; RV64ZVE32F-NEXT: .LBB52_5: # %else2
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB52_10
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB52_8
+; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: .LBB52_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB52_11
-; RV64ZVE32F-NEXT: # %bb.9:
+; RV64ZVE32F-NEXT: bnez a7, .LBB52_9
+; RV64ZVE32F-NEXT: .LBB52_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB52_12
-; RV64ZVE32F-NEXT: .LBB52_10:
+; RV64ZVE32F-NEXT: j .LBB52_10
+; RV64ZVE32F-NEXT: .LBB52_8:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: j .LBB52_8
-; RV64ZVE32F-NEXT: .LBB52_11: # %cond.load7
+; RV64ZVE32F-NEXT: andi a7, a5, 8
+; RV64ZVE32F-NEXT: beqz a7, .LBB52_7
+; RV64ZVE32F-NEXT: .LBB52_9: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: .LBB52_12: # %else8
+; RV64ZVE32F-NEXT: .LBB52_10: # %else8
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
-; RV64ZVE32F-NEXT: beqz t0, .LBB52_16
-; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: beqz t0, .LBB52_13
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: .LBB52_14: # %cond.load10
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB52_17
-; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: bnez t1, .LBB52_14
+; RV64ZVE32F-NEXT: .LBB52_12:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB52_18
-; RV64ZVE32F-NEXT: .LBB52_16:
+; RV64ZVE32F-NEXT: j .LBB52_15
+; RV64ZVE32F-NEXT: .LBB52_13:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: j .LBB52_14
-; RV64ZVE32F-NEXT: .LBB52_17: # %cond.load13
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB52_12
+; RV64ZVE32F-NEXT: .LBB52_14: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB52_18: # %else14
+; RV64ZVE32F-NEXT: .LBB52_15: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB52_22
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB52_18
+; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: .LBB52_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB52_23
-; RV64ZVE32F-NEXT: # %bb.21:
+; RV64ZVE32F-NEXT: bnez a5, .LBB52_19
+; RV64ZVE32F-NEXT: .LBB52_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB52_24
-; RV64ZVE32F-NEXT: .LBB52_22:
-; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: j .LBB52_20
-; RV64ZVE32F-NEXT: .LBB52_23: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB52_18:
+; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB52_17
+; RV64ZVE32F-NEXT: .LBB52_19: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB52_24: # %else20
+; RV64ZVE32F-NEXT: .LBB52_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -5399,82 +5435,82 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccu.vx v10, a1, v8
-; RV32ZVE32F-NEXT: beqz a3, .LBB53_12
+; RV32ZVE32F-NEXT: beqz a3, .LBB53_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v10
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB53_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB53_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB53_8
+; RV32ZVE32F-NEXT: .LBB53_2:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
-; RV32ZVE32F-NEXT: .LBB53_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB53_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB53_9
+; RV32ZVE32F-NEXT: .LBB53_3:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
-; RV32ZVE32F-NEXT: .LBB53_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB53_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB53_10
+; RV32ZVE32F-NEXT: .LBB53_4:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
-; RV32ZVE32F-NEXT: .LBB53_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB53_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB53_11
+; RV32ZVE32F-NEXT: .LBB53_5:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
-; RV32ZVE32F-NEXT: .LBB53_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB53_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB53_12
+; RV32ZVE32F-NEXT: .LBB53_6:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB53_18
-; RV32ZVE32F-NEXT: .LBB53_12:
+; RV32ZVE32F-NEXT: j .LBB53_13
+; RV32ZVE32F-NEXT: .LBB53_7:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: j .LBB53_2
-; RV32ZVE32F-NEXT: .LBB53_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB53_2
+; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v8
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB53_4
-; RV32ZVE32F-NEXT: .LBB53_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB53_3
+; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v8
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB53_6
-; RV32ZVE32F-NEXT: .LBB53_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB53_4
+; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v8
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB53_8
-; RV32ZVE32F-NEXT: .LBB53_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB53_5
+; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v8
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB53_10
-; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB53_6
+; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v8
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB53_18: # %else14
+; RV32ZVE32F-NEXT: .LBB53_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -5482,31 +5518,31 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB53_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB53_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v8
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB53_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB53_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB53_17
+; RV32ZVE32F-NEXT: .LBB53_15:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB53_24
-; RV32ZVE32F-NEXT: .LBB53_22:
+; RV32ZVE32F-NEXT: j .LBB53_18
+; RV32ZVE32F-NEXT: .LBB53_16:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: j .LBB53_20
-; RV32ZVE32F-NEXT: .LBB53_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB53_15
+; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB53_24: # %else20
+; RV32ZVE32F-NEXT: .LBB53_18: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -5536,7 +5572,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB53_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB53_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -5544,16 +5580,16 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: srli a3, a3, 45
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB53_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB53_5
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB53_4
+; RV64ZVE32F-NEXT: .LBB53_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB53_6
-; RV64ZVE32F-NEXT: .LBB53_4:
+; RV64ZVE32F-NEXT: j .LBB53_5
+; RV64ZVE32F-NEXT: .LBB53_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB53_2
-; RV64ZVE32F-NEXT: .LBB53_5: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a5, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB53_2
+; RV64ZVE32F-NEXT: .LBB53_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
@@ -5561,54 +5597,54 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: srli a4, a4, 45
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB53_6: # %else2
+; RV64ZVE32F-NEXT: .LBB53_5: # %else2
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB53_10
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB53_8
+; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v9
; RV64ZVE32F-NEXT: slli a6, a6, 48
; RV64ZVE32F-NEXT: srli a6, a6, 45
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: .LBB53_8: # %cond.load4
; RV64ZVE32F-NEXT: andi a7, a5, 8
-; RV64ZVE32F-NEXT: bnez a7, .LBB53_11
-; RV64ZVE32F-NEXT: # %bb.9:
+; RV64ZVE32F-NEXT: bnez a7, .LBB53_9
+; RV64ZVE32F-NEXT: .LBB53_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB53_12
-; RV64ZVE32F-NEXT: .LBB53_10:
+; RV64ZVE32F-NEXT: j .LBB53_10
+; RV64ZVE32F-NEXT: .LBB53_8:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: j .LBB53_8
-; RV64ZVE32F-NEXT: .LBB53_11: # %cond.load7
+; RV64ZVE32F-NEXT: andi a7, a5, 8
+; RV64ZVE32F-NEXT: beqz a7, .LBB53_7
+; RV64ZVE32F-NEXT: .LBB53_9: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 48
; RV64ZVE32F-NEXT: srli a7, a7, 45
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: .LBB53_12: # %else8
+; RV64ZVE32F-NEXT: .LBB53_10: # %else8
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
-; RV64ZVE32F-NEXT: beqz t0, .LBB53_16
-; RV64ZVE32F-NEXT: # %bb.13: # %cond.load10
+; RV64ZVE32F-NEXT: beqz t0, .LBB53_13
+; RV64ZVE32F-NEXT: # %bb.11: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 48
; RV64ZVE32F-NEXT: srli t0, t0, 45
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: .LBB53_14: # %cond.load10
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB53_17
-; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: bnez t1, .LBB53_14
+; RV64ZVE32F-NEXT: .LBB53_12:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB53_18
-; RV64ZVE32F-NEXT: .LBB53_16:
+; RV64ZVE32F-NEXT: j .LBB53_15
+; RV64ZVE32F-NEXT: .LBB53_13:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: j .LBB53_14
-; RV64ZVE32F-NEXT: .LBB53_17: # %cond.load13
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB53_12
+; RV64ZVE32F-NEXT: .LBB53_14: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
@@ -5616,34 +5652,34 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: srli t1, t1, 45
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB53_18: # %else14
+; RV64ZVE32F-NEXT: .LBB53_15: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB53_22
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB53_18
+; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 48
; RV64ZVE32F-NEXT: srli t2, t2, 45
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: .LBB53_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB53_23
-; RV64ZVE32F-NEXT: # %bb.21:
+; RV64ZVE32F-NEXT: bnez a5, .LBB53_19
+; RV64ZVE32F-NEXT: .LBB53_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB53_24
-; RV64ZVE32F-NEXT: .LBB53_22:
-; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: j .LBB53_20
-; RV64ZVE32F-NEXT: .LBB53_23: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB53_18:
+; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB53_17
+; RV64ZVE32F-NEXT: .LBB53_19: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 48
; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB53_24: # %else20
+; RV64ZVE32F-NEXT: .LBB53_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -5688,81 +5724,81 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB54_12
+; RV32ZVE32F-NEXT: beqz a3, .LBB54_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB54_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB54_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB54_8
+; RV32ZVE32F-NEXT: .LBB54_2:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
-; RV32ZVE32F-NEXT: .LBB54_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB54_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB54_9
+; RV32ZVE32F-NEXT: .LBB54_3:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
-; RV32ZVE32F-NEXT: .LBB54_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB54_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB54_10
+; RV32ZVE32F-NEXT: .LBB54_4:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
-; RV32ZVE32F-NEXT: .LBB54_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB54_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB54_11
+; RV32ZVE32F-NEXT: .LBB54_5:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
-; RV32ZVE32F-NEXT: .LBB54_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB54_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB54_12
+; RV32ZVE32F-NEXT: .LBB54_6:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB54_18
-; RV32ZVE32F-NEXT: .LBB54_12:
+; RV32ZVE32F-NEXT: j .LBB54_13
+; RV32ZVE32F-NEXT: .LBB54_7:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: j .LBB54_2
-; RV32ZVE32F-NEXT: .LBB54_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB54_2
+; RV32ZVE32F-NEXT: .LBB54_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB54_4
-; RV32ZVE32F-NEXT: .LBB54_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB54_3
+; RV32ZVE32F-NEXT: .LBB54_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB54_6
-; RV32ZVE32F-NEXT: .LBB54_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB54_4
+; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB54_8
-; RV32ZVE32F-NEXT: .LBB54_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB54_5
+; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB54_10
-; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB54_6
+; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB54_18: # %else14
+; RV32ZVE32F-NEXT: .LBB54_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -5770,31 +5806,31 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB54_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB54_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB54_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB54_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB54_17
+; RV32ZVE32F-NEXT: .LBB54_15:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB54_24
-; RV32ZVE32F-NEXT: .LBB54_22:
+; RV32ZVE32F-NEXT: j .LBB54_18
+; RV32ZVE32F-NEXT: .LBB54_16:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: j .LBB54_20
-; RV32ZVE32F-NEXT: .LBB54_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB54_15
+; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB54_24: # %else20
+; RV32ZVE32F-NEXT: .LBB54_18: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -5824,108 +5860,108 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB54_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB54_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB54_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB54_5
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB54_4
+; RV64ZVE32F-NEXT: .LBB54_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB54_6
-; RV64ZVE32F-NEXT: .LBB54_4:
+; RV64ZVE32F-NEXT: j .LBB54_5
+; RV64ZVE32F-NEXT: .LBB54_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB54_2
-; RV64ZVE32F-NEXT: .LBB54_5: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a5, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB54_2
+; RV64ZVE32F-NEXT: .LBB54_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB54_6: # %else2
+; RV64ZVE32F-NEXT: .LBB54_5: # %else2
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB54_8
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB54_7
+; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v10
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: j .LBB54_9
-; RV64ZVE32F-NEXT: .LBB54_8:
+; RV64ZVE32F-NEXT: j .LBB54_8
+; RV64ZVE32F-NEXT: .LBB54_7:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: .LBB54_9: # %else5
+; RV64ZVE32F-NEXT: .LBB54_8: # %else5
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
-; RV64ZVE32F-NEXT: beqz a7, .LBB54_15
-; RV64ZVE32F-NEXT: # %bb.10: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a7, .LBB54_12
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: .LBB54_11: # %cond.load7
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB54_16
-; RV64ZVE32F-NEXT: # %bb.12:
+; RV64ZVE32F-NEXT: bnez t0, .LBB54_13
+; RV64ZVE32F-NEXT: .LBB54_10:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB54_13:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB54_17
-; RV64ZVE32F-NEXT: # %bb.14:
+; RV64ZVE32F-NEXT: bnez t1, .LBB54_14
+; RV64ZVE32F-NEXT: .LBB54_11:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB54_18
-; RV64ZVE32F-NEXT: .LBB54_15:
+; RV64ZVE32F-NEXT: j .LBB54_15
+; RV64ZVE32F-NEXT: .LBB54_12:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB54_11
-; RV64ZVE32F-NEXT: .LBB54_16: # %cond.load10
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: beqz t0, .LBB54_10
+; RV64ZVE32F-NEXT: .LBB54_13: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB54_13
-; RV64ZVE32F-NEXT: .LBB54_17: # %cond.load13
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB54_11
+; RV64ZVE32F-NEXT: .LBB54_14: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB54_18: # %else14
+; RV64ZVE32F-NEXT: .LBB54_15: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB54_22
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB54_18
+; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: .LBB54_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB54_23
-; RV64ZVE32F-NEXT: # %bb.21:
+; RV64ZVE32F-NEXT: bnez a5, .LBB54_19
+; RV64ZVE32F-NEXT: .LBB54_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB54_24
-; RV64ZVE32F-NEXT: .LBB54_22:
-; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: j .LBB54_20
-; RV64ZVE32F-NEXT: .LBB54_23: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB54_18:
+; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB54_17
+; RV64ZVE32F-NEXT: .LBB54_19: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB54_24: # %else20
+; RV64ZVE32F-NEXT: .LBB54_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -5969,81 +6005,81 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB55_12
+; RV32ZVE32F-NEXT: beqz a3, .LBB55_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB55_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB55_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB55_8
+; RV32ZVE32F-NEXT: .LBB55_2:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
-; RV32ZVE32F-NEXT: .LBB55_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB55_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB55_9
+; RV32ZVE32F-NEXT: .LBB55_3:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
-; RV32ZVE32F-NEXT: .LBB55_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB55_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB55_10
+; RV32ZVE32F-NEXT: .LBB55_4:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
-; RV32ZVE32F-NEXT: .LBB55_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB55_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB55_11
+; RV32ZVE32F-NEXT: .LBB55_5:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
-; RV32ZVE32F-NEXT: .LBB55_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB55_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB55_12
+; RV32ZVE32F-NEXT: .LBB55_6:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB55_18
-; RV32ZVE32F-NEXT: .LBB55_12:
+; RV32ZVE32F-NEXT: j .LBB55_13
+; RV32ZVE32F-NEXT: .LBB55_7:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: j .LBB55_2
-; RV32ZVE32F-NEXT: .LBB55_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB55_2
+; RV32ZVE32F-NEXT: .LBB55_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB55_4
-; RV32ZVE32F-NEXT: .LBB55_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB55_3
+; RV32ZVE32F-NEXT: .LBB55_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB55_6
-; RV32ZVE32F-NEXT: .LBB55_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB55_4
+; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB55_8
-; RV32ZVE32F-NEXT: .LBB55_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB55_5
+; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB55_10
-; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB55_6
+; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB55_18: # %else14
+; RV32ZVE32F-NEXT: .LBB55_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -6051,31 +6087,31 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB55_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB55_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB55_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB55_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB55_17
+; RV32ZVE32F-NEXT: .LBB55_15:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB55_24
-; RV32ZVE32F-NEXT: .LBB55_22:
+; RV32ZVE32F-NEXT: j .LBB55_18
+; RV32ZVE32F-NEXT: .LBB55_16:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: j .LBB55_20
-; RV32ZVE32F-NEXT: .LBB55_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB55_15
+; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB55_24: # %else20
+; RV32ZVE32F-NEXT: .LBB55_18: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -6105,108 +6141,108 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB55_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB55_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB55_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB55_5
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB55_4
+; RV64ZVE32F-NEXT: .LBB55_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB55_6
-; RV64ZVE32F-NEXT: .LBB55_4:
+; RV64ZVE32F-NEXT: j .LBB55_5
+; RV64ZVE32F-NEXT: .LBB55_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB55_2
-; RV64ZVE32F-NEXT: .LBB55_5: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a5, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB55_2
+; RV64ZVE32F-NEXT: .LBB55_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB55_6: # %else2
+; RV64ZVE32F-NEXT: .LBB55_5: # %else2
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB55_8
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB55_7
+; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v10
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: j .LBB55_9
-; RV64ZVE32F-NEXT: .LBB55_8:
+; RV64ZVE32F-NEXT: j .LBB55_8
+; RV64ZVE32F-NEXT: .LBB55_7:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: .LBB55_9: # %else5
+; RV64ZVE32F-NEXT: .LBB55_8: # %else5
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
-; RV64ZVE32F-NEXT: beqz a7, .LBB55_15
-; RV64ZVE32F-NEXT: # %bb.10: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a7, .LBB55_12
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: .LBB55_11: # %cond.load7
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB55_16
-; RV64ZVE32F-NEXT: # %bb.12:
+; RV64ZVE32F-NEXT: bnez t0, .LBB55_13
+; RV64ZVE32F-NEXT: .LBB55_10:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB55_13:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB55_17
-; RV64ZVE32F-NEXT: # %bb.14:
+; RV64ZVE32F-NEXT: bnez t1, .LBB55_14
+; RV64ZVE32F-NEXT: .LBB55_11:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB55_18
-; RV64ZVE32F-NEXT: .LBB55_15:
+; RV64ZVE32F-NEXT: j .LBB55_15
+; RV64ZVE32F-NEXT: .LBB55_12:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB55_11
-; RV64ZVE32F-NEXT: .LBB55_16: # %cond.load10
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: beqz t0, .LBB55_10
+; RV64ZVE32F-NEXT: .LBB55_13: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB55_13
-; RV64ZVE32F-NEXT: .LBB55_17: # %cond.load13
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB55_11
+; RV64ZVE32F-NEXT: .LBB55_14: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB55_18: # %else14
+; RV64ZVE32F-NEXT: .LBB55_15: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB55_22
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB55_18
+; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: .LBB55_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB55_23
-; RV64ZVE32F-NEXT: # %bb.21:
+; RV64ZVE32F-NEXT: bnez a5, .LBB55_19
+; RV64ZVE32F-NEXT: .LBB55_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB55_24
-; RV64ZVE32F-NEXT: .LBB55_22:
-; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: j .LBB55_20
-; RV64ZVE32F-NEXT: .LBB55_23: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB55_18:
+; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB55_17
+; RV64ZVE32F-NEXT: .LBB55_19: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB55_24: # %else20
+; RV64ZVE32F-NEXT: .LBB55_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -6251,81 +6287,81 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: andi a3, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a3, .LBB56_12
+; RV32ZVE32F-NEXT: beqz a3, .LBB56_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB56_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB56_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB56_8
+; RV32ZVE32F-NEXT: .LBB56_2:
; RV32ZVE32F-NEXT: lw a4, 8(a2)
; RV32ZVE32F-NEXT: lw a5, 12(a2)
-; RV32ZVE32F-NEXT: .LBB56_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB56_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB56_9
+; RV32ZVE32F-NEXT: .LBB56_3:
; RV32ZVE32F-NEXT: lw a6, 16(a2)
; RV32ZVE32F-NEXT: lw a7, 20(a2)
-; RV32ZVE32F-NEXT: .LBB56_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB56_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB56_10
+; RV32ZVE32F-NEXT: .LBB56_4:
; RV32ZVE32F-NEXT: lw t1, 24(a2)
; RV32ZVE32F-NEXT: lw t2, 28(a2)
-; RV32ZVE32F-NEXT: .LBB56_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB56_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB56_11
+; RV32ZVE32F-NEXT: .LBB56_5:
; RV32ZVE32F-NEXT: lw t3, 32(a2)
; RV32ZVE32F-NEXT: lw t4, 36(a2)
-; RV32ZVE32F-NEXT: .LBB56_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB56_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB56_12
+; RV32ZVE32F-NEXT: .LBB56_6:
; RV32ZVE32F-NEXT: lw t5, 40(a2)
; RV32ZVE32F-NEXT: lw t6, 44(a2)
-; RV32ZVE32F-NEXT: j .LBB56_18
-; RV32ZVE32F-NEXT: .LBB56_12:
+; RV32ZVE32F-NEXT: j .LBB56_13
+; RV32ZVE32F-NEXT: .LBB56_7:
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: j .LBB56_2
-; RV32ZVE32F-NEXT: .LBB56_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB56_2
+; RV32ZVE32F-NEXT: .LBB56_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB56_4
-; RV32ZVE32F-NEXT: .LBB56_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB56_3
+; RV32ZVE32F-NEXT: .LBB56_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB56_6
-; RV32ZVE32F-NEXT: .LBB56_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB56_4
+; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB56_8
-; RV32ZVE32F-NEXT: .LBB56_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB56_5
+; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB56_10
-; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB56_6
+; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB56_18: # %else14
+; RV32ZVE32F-NEXT: .LBB56_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -6333,31 +6369,31 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB56_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB56_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB56_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB56_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB56_17
+; RV32ZVE32F-NEXT: .LBB56_15:
; RV32ZVE32F-NEXT: lw t0, 56(a2)
; RV32ZVE32F-NEXT: lw a2, 60(a2)
-; RV32ZVE32F-NEXT: j .LBB56_24
-; RV32ZVE32F-NEXT: .LBB56_22:
+; RV32ZVE32F-NEXT: j .LBB56_18
+; RV32ZVE32F-NEXT: .LBB56_16:
; RV32ZVE32F-NEXT: lw s0, 48(a2)
; RV32ZVE32F-NEXT: lw s1, 52(a2)
-; RV32ZVE32F-NEXT: j .LBB56_20
-; RV32ZVE32F-NEXT: .LBB56_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB56_15
+; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB56_24: # %else20
+; RV32ZVE32F-NEXT: .LBB56_18: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -6387,7 +6423,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB56_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB56_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -6395,16 +6431,16 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: .LBB56_2: # %cond.load
; RV64ZVE32F-NEXT: andi a4, a5, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB56_5
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a4, .LBB56_4
+; RV64ZVE32F-NEXT: .LBB56_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: j .LBB56_6
-; RV64ZVE32F-NEXT: .LBB56_4:
+; RV64ZVE32F-NEXT: j .LBB56_5
+; RV64ZVE32F-NEXT: .LBB56_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB56_2
-; RV64ZVE32F-NEXT: .LBB56_5: # %cond.load1
+; RV64ZVE32F-NEXT: andi a4, a5, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB56_2
+; RV64ZVE32F-NEXT: .LBB56_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
@@ -6412,26 +6448,26 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: srli a4, a4, 29
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB56_6: # %else2
+; RV64ZVE32F-NEXT: .LBB56_5: # %else2
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
-; RV64ZVE32F-NEXT: beqz a6, .LBB56_8
-; RV64ZVE32F-NEXT: # %bb.7: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a6, .LBB56_7
+; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v10
; RV64ZVE32F-NEXT: slli a6, a6, 32
; RV64ZVE32F-NEXT: srli a6, a6, 29
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: j .LBB56_9
-; RV64ZVE32F-NEXT: .LBB56_8:
+; RV64ZVE32F-NEXT: j .LBB56_8
+; RV64ZVE32F-NEXT: .LBB56_7:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
-; RV64ZVE32F-NEXT: .LBB56_9: # %else5
+; RV64ZVE32F-NEXT: .LBB56_8: # %else5
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
-; RV64ZVE32F-NEXT: beqz a7, .LBB56_15
-; RV64ZVE32F-NEXT: # %bb.10: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a7, .LBB56_12
+; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
@@ -6439,29 +6475,29 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: srli a7, a7, 29
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: .LBB56_11: # %cond.load7
; RV64ZVE32F-NEXT: andi t0, a5, 16
-; RV64ZVE32F-NEXT: bnez t0, .LBB56_16
-; RV64ZVE32F-NEXT: # %bb.12:
+; RV64ZVE32F-NEXT: bnez t0, .LBB56_13
+; RV64ZVE32F-NEXT: .LBB56_10:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: .LBB56_13:
; RV64ZVE32F-NEXT: andi t1, a5, 32
-; RV64ZVE32F-NEXT: bnez t1, .LBB56_17
-; RV64ZVE32F-NEXT: # %bb.14:
+; RV64ZVE32F-NEXT: bnez t1, .LBB56_14
+; RV64ZVE32F-NEXT: .LBB56_11:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
-; RV64ZVE32F-NEXT: j .LBB56_18
-; RV64ZVE32F-NEXT: .LBB56_15:
+; RV64ZVE32F-NEXT: j .LBB56_15
+; RV64ZVE32F-NEXT: .LBB56_12:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: j .LBB56_11
-; RV64ZVE32F-NEXT: .LBB56_16: # %cond.load10
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: beqz t0, .LBB56_10
+; RV64ZVE32F-NEXT: .LBB56_13: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 32
; RV64ZVE32F-NEXT: srli t0, t0, 29
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB56_13
-; RV64ZVE32F-NEXT: .LBB56_17: # %cond.load13
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB56_11
+; RV64ZVE32F-NEXT: .LBB56_14: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
@@ -6469,34 +6505,34 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV64ZVE32F-NEXT: srli t1, t1, 29
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: .LBB56_18: # %else14
+; RV64ZVE32F-NEXT: .LBB56_15: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz t2, .LBB56_22
-; RV64ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV64ZVE32F-NEXT: beqz t2, .LBB56_18
+; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 32
; RV64ZVE32F-NEXT: srli t2, t2, 29
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: .LBB56_20: # %cond.load16
; RV64ZVE32F-NEXT: andi a5, a5, -128
-; RV64ZVE32F-NEXT: bnez a5, .LBB56_23
-; RV64ZVE32F-NEXT: # %bb.21:
+; RV64ZVE32F-NEXT: bnez a5, .LBB56_19
+; RV64ZVE32F-NEXT: .LBB56_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
-; RV64ZVE32F-NEXT: j .LBB56_24
-; RV64ZVE32F-NEXT: .LBB56_22:
-; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: j .LBB56_20
-; RV64ZVE32F-NEXT: .LBB56_23: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB56_18:
+; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB56_17
+; RV64ZVE32F-NEXT: .LBB56_19: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB56_24: # %else20
+; RV64ZVE32F-NEXT: .LBB56_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -6556,81 +6592,81 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: andi a2, t0, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: beqz a2, .LBB57_12
+; RV32ZVE32F-NEXT: beqz a2, .LBB57_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: .LBB57_2: # %cond.load
; RV32ZVE32F-NEXT: andi a4, t0, 2
-; RV32ZVE32F-NEXT: bnez a4, .LBB57_13
-; RV32ZVE32F-NEXT: # %bb.3:
+; RV32ZVE32F-NEXT: bnez a4, .LBB57_8
+; RV32ZVE32F-NEXT: .LBB57_2:
; RV32ZVE32F-NEXT: lw a4, 8(a3)
; RV32ZVE32F-NEXT: lw a5, 12(a3)
-; RV32ZVE32F-NEXT: .LBB57_4:
; RV32ZVE32F-NEXT: andi a6, t0, 4
-; RV32ZVE32F-NEXT: bnez a6, .LBB57_14
-; RV32ZVE32F-NEXT: # %bb.5:
+; RV32ZVE32F-NEXT: bnez a6, .LBB57_9
+; RV32ZVE32F-NEXT: .LBB57_3:
; RV32ZVE32F-NEXT: lw a6, 16(a3)
; RV32ZVE32F-NEXT: lw a7, 20(a3)
-; RV32ZVE32F-NEXT: .LBB57_6:
; RV32ZVE32F-NEXT: andi t1, t0, 8
-; RV32ZVE32F-NEXT: bnez t1, .LBB57_15
-; RV32ZVE32F-NEXT: # %bb.7:
+; RV32ZVE32F-NEXT: bnez t1, .LBB57_10
+; RV32ZVE32F-NEXT: .LBB57_4:
; RV32ZVE32F-NEXT: lw t1, 24(a3)
; RV32ZVE32F-NEXT: lw t2, 28(a3)
-; RV32ZVE32F-NEXT: .LBB57_8:
; RV32ZVE32F-NEXT: andi t3, t0, 16
-; RV32ZVE32F-NEXT: bnez t3, .LBB57_16
-; RV32ZVE32F-NEXT: # %bb.9:
+; RV32ZVE32F-NEXT: bnez t3, .LBB57_11
+; RV32ZVE32F-NEXT: .LBB57_5:
; RV32ZVE32F-NEXT: lw t3, 32(a3)
; RV32ZVE32F-NEXT: lw t4, 36(a3)
-; RV32ZVE32F-NEXT: .LBB57_10:
; RV32ZVE32F-NEXT: andi t5, t0, 32
-; RV32ZVE32F-NEXT: bnez t5, .LBB57_17
-; RV32ZVE32F-NEXT: # %bb.11:
+; RV32ZVE32F-NEXT: bnez t5, .LBB57_12
+; RV32ZVE32F-NEXT: .LBB57_6:
; RV32ZVE32F-NEXT: lw t5, 40(a3)
; RV32ZVE32F-NEXT: lw t6, 44(a3)
-; RV32ZVE32F-NEXT: j .LBB57_18
-; RV32ZVE32F-NEXT: .LBB57_12:
+; RV32ZVE32F-NEXT: j .LBB57_13
+; RV32ZVE32F-NEXT: .LBB57_7:
; RV32ZVE32F-NEXT: lw a1, 0(a3)
; RV32ZVE32F-NEXT: lw a2, 4(a3)
-; RV32ZVE32F-NEXT: j .LBB57_2
-; RV32ZVE32F-NEXT: .LBB57_13: # %cond.load1
+; RV32ZVE32F-NEXT: andi a4, t0, 2
+; RV32ZVE32F-NEXT: beqz a4, .LBB57_2
+; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB57_4
-; RV32ZVE32F-NEXT: .LBB57_14: # %cond.load4
+; RV32ZVE32F-NEXT: andi a6, t0, 4
+; RV32ZVE32F-NEXT: beqz a6, .LBB57_3
+; RV32ZVE32F-NEXT: .LBB57_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 0(a7)
; RV32ZVE32F-NEXT: lw a7, 4(a7)
-; RV32ZVE32F-NEXT: j .LBB57_6
-; RV32ZVE32F-NEXT: .LBB57_15: # %cond.load7
+; RV32ZVE32F-NEXT: andi t1, t0, 8
+; RV32ZVE32F-NEXT: beqz t1, .LBB57_4
+; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 0(t2)
; RV32ZVE32F-NEXT: lw t2, 4(t2)
-; RV32ZVE32F-NEXT: j .LBB57_8
-; RV32ZVE32F-NEXT: .LBB57_16: # %cond.load10
+; RV32ZVE32F-NEXT: andi t3, t0, 16
+; RV32ZVE32F-NEXT: beqz t3, .LBB57_5
+; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 0(t4)
; RV32ZVE32F-NEXT: lw t4, 4(t4)
-; RV32ZVE32F-NEXT: j .LBB57_10
-; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load13
+; RV32ZVE32F-NEXT: andi t5, t0, 32
+; RV32ZVE32F-NEXT: beqz t5, .LBB57_6
+; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 0(t6)
; RV32ZVE32F-NEXT: lw t6, 4(t6)
-; RV32ZVE32F-NEXT: .LBB57_18: # %else14
+; RV32ZVE32F-NEXT: .LBB57_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
@@ -6638,31 +6674,31 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
-; RV32ZVE32F-NEXT: beqz s0, .LBB57_22
-; RV32ZVE32F-NEXT: # %bb.19: # %cond.load16
+; RV32ZVE32F-NEXT: beqz s0, .LBB57_16
+; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 0(s1)
; RV32ZVE32F-NEXT: lw s1, 4(s1)
-; RV32ZVE32F-NEXT: .LBB57_20: # %cond.load16
; RV32ZVE32F-NEXT: andi t0, t0, -128
-; RV32ZVE32F-NEXT: bnez t0, .LBB57_23
-; RV32ZVE32F-NEXT: # %bb.21:
+; RV32ZVE32F-NEXT: bnez t0, .LBB57_17
+; RV32ZVE32F-NEXT: .LBB57_15:
; RV32ZVE32F-NEXT: lw t0, 56(a3)
; RV32ZVE32F-NEXT: lw a3, 60(a3)
-; RV32ZVE32F-NEXT: j .LBB57_24
-; RV32ZVE32F-NEXT: .LBB57_22:
+; RV32ZVE32F-NEXT: j .LBB57_18
+; RV32ZVE32F-NEXT: .LBB57_16:
; RV32ZVE32F-NEXT: lw s0, 48(a3)
; RV32ZVE32F-NEXT: lw s1, 52(a3)
-; RV32ZVE32F-NEXT: j .LBB57_20
-; RV32ZVE32F-NEXT: .LBB57_23: # %cond.load19
+; RV32ZVE32F-NEXT: andi t0, t0, -128
+; RV32ZVE32F-NEXT: beqz t0, .LBB57_15
+; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw t0, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
-; RV32ZVE32F-NEXT: .LBB57_24: # %else20
+; RV32ZVE32F-NEXT: .LBB57_18: # %else20
; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
@@ -6692,93 +6728,93 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a7, v0
; RV64ZVE32F-NEXT: andi a4, a7, 1
-; RV64ZVE32F-NEXT: beqz a4, .LBB57_16
+; RV64ZVE32F-NEXT: beqz a4, .LBB57_9
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a4, 0(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: .LBB57_2: # %cond.load
; RV64ZVE32F-NEXT: andi a5, a7, 2
-; RV64ZVE32F-NEXT: bnez a5, .LBB57_17
-; RV64ZVE32F-NEXT: # %bb.3:
+; RV64ZVE32F-NEXT: bnez a5, .LBB57_10
+; RV64ZVE32F-NEXT: .LBB57_2:
; RV64ZVE32F-NEXT: ld a5, 8(a3)
-; RV64ZVE32F-NEXT: .LBB57_4:
; RV64ZVE32F-NEXT: andi a6, a7, 4
-; RV64ZVE32F-NEXT: bnez a6, .LBB57_18
-; RV64ZVE32F-NEXT: # %bb.5:
+; RV64ZVE32F-NEXT: bnez a6, .LBB57_11
+; RV64ZVE32F-NEXT: .LBB57_3:
; RV64ZVE32F-NEXT: ld a6, 16(a3)
-; RV64ZVE32F-NEXT: .LBB57_6:
; RV64ZVE32F-NEXT: andi t0, a7, 8
-; RV64ZVE32F-NEXT: bnez t0, .LBB57_19
-; RV64ZVE32F-NEXT: # %bb.7:
+; RV64ZVE32F-NEXT: bnez t0, .LBB57_12
+; RV64ZVE32F-NEXT: .LBB57_4:
; RV64ZVE32F-NEXT: ld t0, 24(a3)
-; RV64ZVE32F-NEXT: .LBB57_8:
; RV64ZVE32F-NEXT: andi t1, a7, 16
-; RV64ZVE32F-NEXT: bnez t1, .LBB57_20
-; RV64ZVE32F-NEXT: # %bb.9:
+; RV64ZVE32F-NEXT: bnez t1, .LBB57_13
+; RV64ZVE32F-NEXT: .LBB57_5:
; RV64ZVE32F-NEXT: ld t1, 32(a3)
-; RV64ZVE32F-NEXT: .LBB57_10:
; RV64ZVE32F-NEXT: andi t2, a7, 32
-; RV64ZVE32F-NEXT: bnez t2, .LBB57_21
-; RV64ZVE32F-NEXT: # %bb.11:
+; RV64ZVE32F-NEXT: bnez t2, .LBB57_14
+; RV64ZVE32F-NEXT: .LBB57_6:
; RV64ZVE32F-NEXT: ld t2, 40(a3)
-; RV64ZVE32F-NEXT: .LBB57_12:
; RV64ZVE32F-NEXT: andi t3, a7, 64
-; RV64ZVE32F-NEXT: bnez t3, .LBB57_22
-; RV64ZVE32F-NEXT: # %bb.13:
+; RV64ZVE32F-NEXT: bnez t3, .LBB57_15
+; RV64ZVE32F-NEXT: .LBB57_7:
; RV64ZVE32F-NEXT: ld t3, 48(a3)
-; RV64ZVE32F-NEXT: .LBB57_14:
; RV64ZVE32F-NEXT: andi a7, a7, -128
-; RV64ZVE32F-NEXT: bnez a7, .LBB57_23
-; RV64ZVE32F-NEXT: # %bb.15:
+; RV64ZVE32F-NEXT: bnez a7, .LBB57_16
+; RV64ZVE32F-NEXT: .LBB57_8:
; RV64ZVE32F-NEXT: ld a1, 56(a3)
-; RV64ZVE32F-NEXT: j .LBB57_24
-; RV64ZVE32F-NEXT: .LBB57_16:
+; RV64ZVE32F-NEXT: j .LBB57_17
+; RV64ZVE32F-NEXT: .LBB57_9:
; RV64ZVE32F-NEXT: ld a4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB57_2
-; RV64ZVE32F-NEXT: .LBB57_17: # %cond.load1
+; RV64ZVE32F-NEXT: andi a5, a7, 2
+; RV64ZVE32F-NEXT: beqz a5, .LBB57_2
+; RV64ZVE32F-NEXT: .LBB57_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a5, 8(a2)
; RV64ZVE32F-NEXT: slli a5, a5, 3
; RV64ZVE32F-NEXT: add a5, a1, a5
; RV64ZVE32F-NEXT: ld a5, 0(a5)
-; RV64ZVE32F-NEXT: j .LBB57_4
-; RV64ZVE32F-NEXT: .LBB57_18: # %cond.load4
+; RV64ZVE32F-NEXT: andi a6, a7, 4
+; RV64ZVE32F-NEXT: beqz a6, .LBB57_3
+; RV64ZVE32F-NEXT: .LBB57_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
-; RV64ZVE32F-NEXT: j .LBB57_6
-; RV64ZVE32F-NEXT: .LBB57_19: # %cond.load7
+; RV64ZVE32F-NEXT: andi t0, a7, 8
+; RV64ZVE32F-NEXT: beqz t0, .LBB57_4
+; RV64ZVE32F-NEXT: .LBB57_12: # %cond.load7
; RV64ZVE32F-NEXT: ld t0, 24(a2)
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB57_8
-; RV64ZVE32F-NEXT: .LBB57_20: # %cond.load10
+; RV64ZVE32F-NEXT: andi t1, a7, 16
+; RV64ZVE32F-NEXT: beqz t1, .LBB57_5
+; RV64ZVE32F-NEXT: .LBB57_13: # %cond.load10
; RV64ZVE32F-NEXT: ld t1, 32(a2)
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: j .LBB57_10
-; RV64ZVE32F-NEXT: .LBB57_21: # %cond.load13
+; RV64ZVE32F-NEXT: andi t2, a7, 32
+; RV64ZVE32F-NEXT: beqz t2, .LBB57_6
+; RV64ZVE32F-NEXT: .LBB57_14: # %cond.load13
; RV64ZVE32F-NEXT: ld t2, 40(a2)
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: j .LBB57_12
-; RV64ZVE32F-NEXT: .LBB57_22: # %cond.load16
+; RV64ZVE32F-NEXT: andi t3, a7, 64
+; RV64ZVE32F-NEXT: beqz t3, .LBB57_7
+; RV64ZVE32F-NEXT: .LBB57_15: # %cond.load16
; RV64ZVE32F-NEXT: ld t3, 48(a2)
; RV64ZVE32F-NEXT: slli t3, t3, 3
; RV64ZVE32F-NEXT: add t3, a1, t3
; RV64ZVE32F-NEXT: ld t3, 0(t3)
-; RV64ZVE32F-NEXT: j .LBB57_14
-; RV64ZVE32F-NEXT: .LBB57_23: # %cond.load19
+; RV64ZVE32F-NEXT: andi a7, a7, -128
+; RV64ZVE32F-NEXT: beqz a7, .LBB57_8
+; RV64ZVE32F-NEXT: .LBB57_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a2, 56(a2)
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: .LBB57_24: # %else20
+; RV64ZVE32F-NEXT: .LBB57_17: # %else20
; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: sd a5, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
@@ -6861,16 +6897,17 @@ define <2 x bfloat> @mgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x bfloat> %
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB59_3
-; RV64ZVE32F-NEXT: .LBB59_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB59_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB59_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
-; RV64ZVE32F-NEXT: j .LBB59_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -6905,7 +6942,7 @@ define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB60_5
-; RV64ZVE32F-NEXT: .LBB60_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB60_6
; RV64ZVE32F-NEXT: .LBB60_2: # %else2
@@ -6914,14 +6951,15 @@ define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %
; RV64ZVE32F-NEXT: .LBB60_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB60_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else8
+; RV64ZVE32F-NEXT: .LBB60_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: j .LBB60_1
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -6929,14 +6967,16 @@ define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: j .LBB60_2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB60_3
; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: j .LBB60_3
+; RV64ZVE32F-NEXT: andi a1, a1, 8
+; RV64ZVE32F-NEXT: beqz a1, .LBB60_4
; RV64ZVE32F-NEXT: .LBB60_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
@@ -7026,7 +7066,7 @@ define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB63_9
-; RV64ZVE32F-NEXT: .LBB63_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB63_10
; RV64ZVE32F-NEXT: .LBB63_2: # %else2
@@ -7047,14 +7087,15 @@ define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %
; RV64ZVE32F-NEXT: .LBB63_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB63_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else20
+; RV64ZVE32F-NEXT: .LBB63_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: j .LBB63_1
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
@@ -7062,42 +7103,48 @@ define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: j .LBB63_2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB63_3
; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: j .LBB63_3
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB63_4
; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
-; RV64ZVE32F-NEXT: j .LBB63_4
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB63_5
; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
-; RV64ZVE32F-NEXT: j .LBB63_5
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB63_6
; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
-; RV64ZVE32F-NEXT: j .LBB63_6
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: beqz a2, .LBB63_7
; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
-; RV64ZVE32F-NEXT: j .LBB63_7
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB63_8
; RV64ZVE32F-NEXT: .LBB63_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
@@ -7740,16 +7787,17 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1
; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB69_3
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_1: # %else
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_4
-; RV64ZVE32F-ZVFH-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_2: # %else2
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB69_3: # %cond.load
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: j .LBB69_1
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_2
; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %cond.load1
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a1)
; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -7764,16 +7812,17 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB69_3
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_4
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_3: # %cond.load
; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_1
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_2
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1
; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a1)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -7808,7 +7857,7 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_5
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_1: # %else
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_6
; RV64ZVE32F-ZVFH-NEXT: .LBB70_2: # %else2
@@ -7817,14 +7866,15 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: .LBB70_3: # %else5
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_8
-; RV64ZVE32F-ZVFH-NEXT: # %bb.4: # %else8
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else8
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB70_5: # %cond.load
; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: j .LBB70_1
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_2
; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %cond.load1
; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
@@ -7832,14 +7882,16 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-ZVFH-NEXT: j .LBB70_2
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_3
; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %cond.load4
; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-ZVFH-NEXT: j .LBB70_3
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_4
; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.load7
; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
@@ -7854,7 +7906,7 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else2
@@ -7863,14 +7915,15 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_3: # %else5
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_8
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.4: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else8
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %cond.load
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_1
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_2
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
@@ -7878,14 +7931,16 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_3
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_3
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_4
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7
; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
@@ -7992,7 +8047,7 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_9
-; RV64ZVE32F-ZVFH-NEXT: .LBB73_1: # %else
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_10
; RV64ZVE32F-ZVFH-NEXT: .LBB73_2: # %else2
@@ -8013,14 +8068,15 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: .LBB73_7: # %else17
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB73_16
-; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %else20
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_8: # %else20
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB73_9: # %cond.load
; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-ZVFH-NEXT: j .LBB73_1
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_2
; RV64ZVE32F-ZVFH-NEXT: .LBB73_10: # %cond.load1
; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
@@ -8028,42 +8084,48 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-ZVFH-NEXT: j .LBB73_2
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_3
; RV64ZVE32F-ZVFH-NEXT: .LBB73_11: # %cond.load4
; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-ZVFH-NEXT: j .LBB73_3
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_4
; RV64ZVE32F-ZVFH-NEXT: .LBB73_12: # %cond.load7
; RV64ZVE32F-ZVFH-NEXT: ld a2, 24(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3
-; RV64ZVE32F-ZVFH-NEXT: j .LBB73_4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_5
; RV64ZVE32F-ZVFH-NEXT: .LBB73_13: # %cond.load10
; RV64ZVE32F-ZVFH-NEXT: ld a2, 32(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 4
-; RV64ZVE32F-ZVFH-NEXT: j .LBB73_5
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_6
; RV64ZVE32F-ZVFH-NEXT: .LBB73_14: # %cond.load13
; RV64ZVE32F-ZVFH-NEXT: ld a2, 40(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 5
-; RV64ZVE32F-ZVFH-NEXT: j .LBB73_6
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_7
; RV64ZVE32F-ZVFH-NEXT: .LBB73_15: # %cond.load16
; RV64ZVE32F-ZVFH-NEXT: ld a2, 48(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 6
-; RV64ZVE32F-ZVFH-NEXT: j .LBB73_7
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB73_8
; RV64ZVE32F-ZVFH-NEXT: .LBB73_16: # %cond.load19
; RV64ZVE32F-ZVFH-NEXT: ld a0, 56(a0)
; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
@@ -8078,7 +8140,7 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_9
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_10
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_2: # %else2
@@ -8099,14 +8161,15 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_7: # %else17
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_16
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %else20
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_8: # %else20
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_9: # %cond.load
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_1
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_2
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_10: # %cond.load1
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
@@ -8114,42 +8177,48 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_3
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_11: # %cond.load4
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_3
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_4
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_12: # %cond.load7
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 24(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 3
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_13: # %cond.load10
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 32(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 4
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_5
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_14: # %cond.load13
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 40(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 5
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_7
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_15: # %cond.load16
; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 48(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 6
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB73_7
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_8
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_16: # %cond.load19
; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 56(a0)
; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
@@ -9257,16 +9326,17 @@ define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %pas
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB79_3
-; RV64ZVE32F-NEXT: .LBB79_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB79_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB79_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB79_3: # %cond.load
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: j .LBB79_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_2
; RV64ZVE32F-NEXT: .LBB79_4: # %cond.load1
; RV64ZVE32F-NEXT: flw fa5, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -9301,7 +9371,7 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB80_5
-; RV64ZVE32F-NEXT: .LBB80_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB80_6
; RV64ZVE32F-NEXT: .LBB80_2: # %else2
@@ -9310,14 +9380,15 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas
; RV64ZVE32F-NEXT: .LBB80_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB80_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else8
+; RV64ZVE32F-NEXT: .LBB80_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB80_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: j .LBB80_1
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
; RV64ZVE32F-NEXT: .LBB80_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
@@ -9325,14 +9396,16 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
-; RV64ZVE32F-NEXT: j .LBB80_2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_3
; RV64ZVE32F-NEXT: .LBB80_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
-; RV64ZVE32F-NEXT: j .LBB80_3
+; RV64ZVE32F-NEXT: andi a1, a1, 8
+; RV64ZVE32F-NEXT: beqz a1, .LBB80_4
; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
@@ -9421,7 +9494,7 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB83_9
-; RV64ZVE32F-NEXT: .LBB83_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB83_10
; RV64ZVE32F-NEXT: .LBB83_2: # %else2
@@ -9442,14 +9515,15 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas
; RV64ZVE32F-NEXT: .LBB83_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else20
+; RV64ZVE32F-NEXT: .LBB83_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB83_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: j .LBB83_1
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_2
; RV64ZVE32F-NEXT: .LBB83_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
@@ -9457,42 +9531,48 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
-; RV64ZVE32F-NEXT: j .LBB83_2
+; RV64ZVE32F-NEXT: andi a2, a1, 4
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_3
; RV64ZVE32F-NEXT: .LBB83_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
-; RV64ZVE32F-NEXT: j .LBB83_3
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_4
; RV64ZVE32F-NEXT: .LBB83_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
-; RV64ZVE32F-NEXT: j .LBB83_4
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_5
; RV64ZVE32F-NEXT: .LBB83_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
-; RV64ZVE32F-NEXT: j .LBB83_5
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
; RV64ZVE32F-NEXT: .LBB83_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
-; RV64ZVE32F-NEXT: j .LBB83_6
+; RV64ZVE32F-NEXT: andi a2, a1, 64
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
; RV64ZVE32F-NEXT: .LBB83_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
-; RV64ZVE32F-NEXT: j .LBB83_7
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB83_8
; RV64ZVE32F-NEXT: .LBB83_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
@@ -10432,13 +10512,13 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB90_15
-; RV64ZVE32F-NEXT: .LBB90_7: # %else8
+; RV64ZVE32F-NEXT: # %bb.7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB90_16
; RV64ZVE32F-NEXT: .LBB90_8: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB90_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
+; RV64ZVE32F-NEXT: .LBB90_9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10487,7 +10567,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 3
-; RV64ZVE32F-NEXT: j .LBB90_7
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB90_8
; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10496,7 +10577,9 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: j .LBB90_8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB90_9
+; RV64ZVE32F-NEXT: j .LBB90_10
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
@@ -10567,16 +10650,17 @@ define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %p
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
; RV32ZVE32F-NEXT: bnez a1, .LBB92_3
-; RV32ZVE32F-NEXT: .LBB92_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB92_4
-; RV32ZVE32F-NEXT: # %bb.2: # %else2
+; RV32ZVE32F-NEXT: .LBB92_2: # %else2
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB92_3: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB92_1
+; RV32ZVE32F-NEXT: andi a0, a0, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_2
; RV32ZVE32F-NEXT: .LBB92_4: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -10590,14 +10674,15 @@ define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %p
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB92_3
-; RV64ZVE32F-NEXT: .LBB92_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB92_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB92_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB92_3: # %cond.load
; RV64ZVE32F-NEXT: fld fa0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB92_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB92_2
; RV64ZVE32F-NEXT: .LBB92_4: # %cond.load1
; RV64ZVE32F-NEXT: fld fa1, 0(a1)
; RV64ZVE32F-NEXT: ret
@@ -10628,7 +10713,7 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB93_6
-; RV32ZVE32F-NEXT: .LBB93_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB93_7
; RV32ZVE32F-NEXT: .LBB93_2: # %else2
@@ -10637,7 +10722,7 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV32ZVE32F-NEXT: .LBB93_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a1, 8
; RV32ZVE32F-NEXT: beqz a1, .LBB93_5
-; RV32ZVE32F-NEXT: # %bb.4: # %cond.load7
+; RV32ZVE32F-NEXT: .LBB93_4: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10652,19 +10737,23 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB93_1
+; RV32ZVE32F-NEXT: andi a2, a1, 2
+; RV32ZVE32F-NEXT: beqz a2, .LBB93_2
; RV32ZVE32F-NEXT: .LBB93_7: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB93_2
+; RV32ZVE32F-NEXT: andi a2, a1, 4
+; RV32ZVE32F-NEXT: beqz a2, .LBB93_3
; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB93_3
+; RV32ZVE32F-NEXT: andi a1, a1, 8
+; RV32ZVE32F-NEXT: bnez a1, .LBB93_4
+; RV32ZVE32F-NEXT: j .LBB93_5
;
; RV64ZVE32F-LABEL: mgather_v4f64:
; RV64ZVE32F: # %bb.0:
@@ -10672,7 +10761,7 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB93_6
-; RV64ZVE32F-NEXT: .LBB93_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB93_7
; RV64ZVE32F-NEXT: .LBB93_2: # %else2
@@ -10681,7 +10770,7 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV64ZVE32F-NEXT: .LBB93_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a2, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB93_5
-; RV64ZVE32F-NEXT: # %bb.4: # %cond.load7
+; RV64ZVE32F-NEXT: .LBB93_4: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a1)
; RV64ZVE32F-NEXT: .LBB93_5: # %else8
@@ -10693,15 +10782,19 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV64ZVE32F-NEXT: .LBB93_6: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB93_1
+; RV64ZVE32F-NEXT: andi a3, a2, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB93_2
; RV64ZVE32F-NEXT: .LBB93_7: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB93_2
+; RV64ZVE32F-NEXT: andi a3, a2, 4
+; RV64ZVE32F-NEXT: beqz a3, .LBB93_3
; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB93_3
+; RV64ZVE32F-NEXT: andi a2, a2, 8
+; RV64ZVE32F-NEXT: bnez a2, .LBB93_4
+; RV64ZVE32F-NEXT: j .LBB93_5
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x double> %passthru)
ret <4 x double> %v
}
@@ -10814,7 +10907,7 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB96_10
-; RV32ZVE32F-NEXT: .LBB96_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB96_11
; RV32ZVE32F-NEXT: .LBB96_2: # %else2
@@ -10835,7 +10928,7 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV32ZVE32F-NEXT: .LBB96_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB96_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10854,43 +10947,51 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB96_1
+; RV32ZVE32F-NEXT: andi a2, a1, 2
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_2
; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB96_2
+; RV32ZVE32F-NEXT: andi a2, a1, 4
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_3
; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB96_3
+; RV32ZVE32F-NEXT: andi a2, a1, 8
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_4
; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB96_4
+; RV32ZVE32F-NEXT: andi a2, a1, 16
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_5
; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB96_5
+; RV32ZVE32F-NEXT: andi a2, a1, 32
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_6
; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB96_6
+; RV32ZVE32F-NEXT: andi a2, a1, 64
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_7
; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB96_7
+; RV32ZVE32F-NEXT: andi a1, a1, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB96_8
+; RV32ZVE32F-NEXT: j .LBB96_9
;
; RV64ZVE32F-LABEL: mgather_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -10898,7 +10999,7 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB96_10
-; RV64ZVE32F-NEXT: .LBB96_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB96_11
; RV64ZVE32F-NEXT: .LBB96_2: # %else2
@@ -10919,7 +11020,7 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV64ZVE32F-NEXT: .LBB96_7: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB96_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19
; RV64ZVE32F-NEXT: ld a1, 56(a1)
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB96_9: # %else20
@@ -10935,31 +11036,39 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB96_1
+; RV64ZVE32F-NEXT: andi a3, a2, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_2
; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB96_2
+; RV64ZVE32F-NEXT: andi a3, a2, 4
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_3
; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB96_3
+; RV64ZVE32F-NEXT: andi a3, a2, 8
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_4
; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7
; RV64ZVE32F-NEXT: ld a3, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB96_4
+; RV64ZVE32F-NEXT: andi a3, a2, 16
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_5
; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10
; RV64ZVE32F-NEXT: ld a3, 32(a1)
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB96_5
+; RV64ZVE32F-NEXT: andi a3, a2, 32
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_6
; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13
; RV64ZVE32F-NEXT: ld a3, 40(a1)
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB96_6
+; RV64ZVE32F-NEXT: andi a3, a2, 64
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_7
; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16
; RV64ZVE32F-NEXT: ld a3, 48(a1)
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB96_7
+; RV64ZVE32F-NEXT: andi a2, a2, -128
+; RV64ZVE32F-NEXT: bnez a2, .LBB96_8
+; RV64ZVE32F-NEXT: j .LBB96_9
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
@@ -10995,7 +11104,7 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB97_10
-; RV32ZVE32F-NEXT: .LBB97_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB97_11
; RV32ZVE32F-NEXT: .LBB97_2: # %else2
@@ -11016,7 +11125,7 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV32ZVE32F-NEXT: .LBB97_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB97_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB97_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11034,43 +11143,51 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV32ZVE32F-NEXT: .LBB97_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB97_1
+; RV32ZVE32F-NEXT: andi a1, a2, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_2
; RV32ZVE32F-NEXT: .LBB97_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB97_2
+; RV32ZVE32F-NEXT: andi a1, a2, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_3
; RV32ZVE32F-NEXT: .LBB97_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB97_3
+; RV32ZVE32F-NEXT: andi a1, a2, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_4
; RV32ZVE32F-NEXT: .LBB97_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB97_4
+; RV32ZVE32F-NEXT: andi a1, a2, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_5
; RV32ZVE32F-NEXT: .LBB97_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB97_5
+; RV32ZVE32F-NEXT: andi a1, a2, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_6
; RV32ZVE32F-NEXT: .LBB97_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB97_6
+; RV32ZVE32F-NEXT: andi a1, a2, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_7
; RV32ZVE32F-NEXT: .LBB97_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB97_7
+; RV32ZVE32F-NEXT: andi a1, a2, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB97_8
+; RV32ZVE32F-NEXT: j .LBB97_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -11197,7 +11314,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB98_10
-; RV32ZVE32F-NEXT: .LBB98_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB98_11
; RV32ZVE32F-NEXT: .LBB98_2: # %else2
@@ -11218,7 +11335,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: .LBB98_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB98_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB98_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11236,43 +11353,51 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: .LBB98_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB98_1
+; RV32ZVE32F-NEXT: andi a1, a2, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_2
; RV32ZVE32F-NEXT: .LBB98_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB98_2
+; RV32ZVE32F-NEXT: andi a1, a2, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_3
; RV32ZVE32F-NEXT: .LBB98_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB98_3
+; RV32ZVE32F-NEXT: andi a1, a2, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_4
; RV32ZVE32F-NEXT: .LBB98_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB98_4
+; RV32ZVE32F-NEXT: andi a1, a2, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_5
; RV32ZVE32F-NEXT: .LBB98_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB98_5
+; RV32ZVE32F-NEXT: andi a1, a2, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_6
; RV32ZVE32F-NEXT: .LBB98_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB98_6
+; RV32ZVE32F-NEXT: andi a1, a2, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_7
; RV32ZVE32F-NEXT: .LBB98_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB98_7
+; RV32ZVE32F-NEXT: andi a1, a2, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB98_8
+; RV32ZVE32F-NEXT: j .LBB98_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -11401,7 +11526,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB99_10
-; RV32ZVE32F-NEXT: .LBB99_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB99_11
; RV32ZVE32F-NEXT: .LBB99_2: # %else2
@@ -11422,7 +11547,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: .LBB99_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB99_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB99_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11440,43 +11565,51 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: .LBB99_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB99_1
+; RV32ZVE32F-NEXT: andi a1, a2, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_2
; RV32ZVE32F-NEXT: .LBB99_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB99_2
+; RV32ZVE32F-NEXT: andi a1, a2, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_3
; RV32ZVE32F-NEXT: .LBB99_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB99_3
+; RV32ZVE32F-NEXT: andi a1, a2, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_4
; RV32ZVE32F-NEXT: .LBB99_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB99_4
+; RV32ZVE32F-NEXT: andi a1, a2, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_5
; RV32ZVE32F-NEXT: .LBB99_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB99_5
+; RV32ZVE32F-NEXT: andi a1, a2, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_6
; RV32ZVE32F-NEXT: .LBB99_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB99_6
+; RV32ZVE32F-NEXT: andi a1, a2, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_7
; RV32ZVE32F-NEXT: .LBB99_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB99_7
+; RV32ZVE32F-NEXT: andi a1, a2, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB99_8
+; RV32ZVE32F-NEXT: j .LBB99_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -11612,7 +11745,7 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a2, v8
; RV32ZVE32F-NEXT: bnez a3, .LBB100_10
-; RV32ZVE32F-NEXT: .LBB100_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB100_11
; RV32ZVE32F-NEXT: .LBB100_2: # %else2
@@ -11633,7 +11766,7 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV32ZVE32F-NEXT: .LBB100_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB100_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB100_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11652,43 +11785,51 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB100_1
+; RV32ZVE32F-NEXT: andi a2, a1, 2
+; RV32ZVE32F-NEXT: beqz a2, .LBB100_2
; RV32ZVE32F-NEXT: .LBB100_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB100_2
+; RV32ZVE32F-NEXT: andi a2, a1, 4
+; RV32ZVE32F-NEXT: beqz a2, .LBB100_3
; RV32ZVE32F-NEXT: .LBB100_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB100_3
+; RV32ZVE32F-NEXT: andi a2, a1, 8
+; RV32ZVE32F-NEXT: beqz a2, .LBB100_4
; RV32ZVE32F-NEXT: .LBB100_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB100_4
+; RV32ZVE32F-NEXT: andi a2, a1, 16
+; RV32ZVE32F-NEXT: beqz a2, .LBB100_5
; RV32ZVE32F-NEXT: .LBB100_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB100_5
+; RV32ZVE32F-NEXT: andi a2, a1, 32
+; RV32ZVE32F-NEXT: beqz a2, .LBB100_6
; RV32ZVE32F-NEXT: .LBB100_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB100_6
+; RV32ZVE32F-NEXT: andi a2, a1, 64
+; RV32ZVE32F-NEXT: beqz a2, .LBB100_7
; RV32ZVE32F-NEXT: .LBB100_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB100_7
+; RV32ZVE32F-NEXT: andi a1, a1, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB100_8
+; RV32ZVE32F-NEXT: j .LBB100_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -11816,7 +11957,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a2, v8
; RV32ZVE32F-NEXT: bnez a3, .LBB101_10
-; RV32ZVE32F-NEXT: .LBB101_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB101_11
; RV32ZVE32F-NEXT: .LBB101_2: # %else2
@@ -11837,7 +11978,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: .LBB101_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB101_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB101_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11856,43 +11997,51 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB101_1
+; RV32ZVE32F-NEXT: andi a2, a1, 2
+; RV32ZVE32F-NEXT: beqz a2, .LBB101_2
; RV32ZVE32F-NEXT: .LBB101_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB101_2
+; RV32ZVE32F-NEXT: andi a2, a1, 4
+; RV32ZVE32F-NEXT: beqz a2, .LBB101_3
; RV32ZVE32F-NEXT: .LBB101_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB101_3
+; RV32ZVE32F-NEXT: andi a2, a1, 8
+; RV32ZVE32F-NEXT: beqz a2, .LBB101_4
; RV32ZVE32F-NEXT: .LBB101_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB101_4
+; RV32ZVE32F-NEXT: andi a2, a1, 16
+; RV32ZVE32F-NEXT: beqz a2, .LBB101_5
; RV32ZVE32F-NEXT: .LBB101_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB101_5
+; RV32ZVE32F-NEXT: andi a2, a1, 32
+; RV32ZVE32F-NEXT: beqz a2, .LBB101_6
; RV32ZVE32F-NEXT: .LBB101_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB101_6
+; RV32ZVE32F-NEXT: andi a2, a1, 64
+; RV32ZVE32F-NEXT: beqz a2, .LBB101_7
; RV32ZVE32F-NEXT: .LBB101_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB101_7
+; RV32ZVE32F-NEXT: andi a1, a1, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB101_8
+; RV32ZVE32F-NEXT: j .LBB101_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12022,7 +12171,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccu.vx v10, a2, v8
; RV32ZVE32F-NEXT: bnez a3, .LBB102_10
-; RV32ZVE32F-NEXT: .LBB102_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB102_11
; RV32ZVE32F-NEXT: .LBB102_2: # %else2
@@ -12043,7 +12192,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: .LBB102_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB102_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB102_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12062,43 +12211,51 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB102_1
+; RV32ZVE32F-NEXT: andi a2, a1, 2
+; RV32ZVE32F-NEXT: beqz a2, .LBB102_2
; RV32ZVE32F-NEXT: .LBB102_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB102_2
+; RV32ZVE32F-NEXT: andi a2, a1, 4
+; RV32ZVE32F-NEXT: beqz a2, .LBB102_3
; RV32ZVE32F-NEXT: .LBB102_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB102_3
+; RV32ZVE32F-NEXT: andi a2, a1, 8
+; RV32ZVE32F-NEXT: beqz a2, .LBB102_4
; RV32ZVE32F-NEXT: .LBB102_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB102_4
+; RV32ZVE32F-NEXT: andi a2, a1, 16
+; RV32ZVE32F-NEXT: beqz a2, .LBB102_5
; RV32ZVE32F-NEXT: .LBB102_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB102_5
+; RV32ZVE32F-NEXT: andi a2, a1, 32
+; RV32ZVE32F-NEXT: beqz a2, .LBB102_6
; RV32ZVE32F-NEXT: .LBB102_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB102_6
+; RV32ZVE32F-NEXT: andi a2, a1, 64
+; RV32ZVE32F-NEXT: beqz a2, .LBB102_7
; RV32ZVE32F-NEXT: .LBB102_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
-; RV32ZVE32F-NEXT: j .LBB102_7
+; RV32ZVE32F-NEXT: andi a1, a1, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB102_8
+; RV32ZVE32F-NEXT: j .LBB102_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12234,7 +12391,7 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB103_10
-; RV32ZVE32F-NEXT: .LBB103_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB103_11
; RV32ZVE32F-NEXT: .LBB103_2: # %else2
@@ -12255,7 +12412,7 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV32ZVE32F-NEXT: .LBB103_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB103_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB103_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12273,43 +12430,51 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV32ZVE32F-NEXT: .LBB103_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB103_1
+; RV32ZVE32F-NEXT: andi a1, a2, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_2
; RV32ZVE32F-NEXT: .LBB103_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB103_2
+; RV32ZVE32F-NEXT: andi a1, a2, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_3
; RV32ZVE32F-NEXT: .LBB103_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB103_3
+; RV32ZVE32F-NEXT: andi a1, a2, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_4
; RV32ZVE32F-NEXT: .LBB103_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB103_4
+; RV32ZVE32F-NEXT: andi a1, a2, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_5
; RV32ZVE32F-NEXT: .LBB103_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB103_5
+; RV32ZVE32F-NEXT: andi a1, a2, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_6
; RV32ZVE32F-NEXT: .LBB103_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB103_6
+; RV32ZVE32F-NEXT: andi a1, a2, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_7
; RV32ZVE32F-NEXT: .LBB103_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB103_7
+; RV32ZVE32F-NEXT: andi a1, a2, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB103_8
+; RV32ZVE32F-NEXT: j .LBB103_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12348,13 +12513,13 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB103_15
-; RV64ZVE32F-NEXT: .LBB103_7: # %else8
+; RV64ZVE32F-NEXT: # %bb.7: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB103_16
; RV64ZVE32F-NEXT: .LBB103_8: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB103_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
+; RV64ZVE32F-NEXT: .LBB103_9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
@@ -12397,14 +12562,17 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB103_7
+; RV64ZVE32F-NEXT: andi a3, a2, 16
+; RV64ZVE32F-NEXT: beqz a3, .LBB103_8
; RV64ZVE32F-NEXT: .LBB103_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB103_8
+; RV64ZVE32F-NEXT: andi a3, a2, 32
+; RV64ZVE32F-NEXT: bnez a3, .LBB103_9
+; RV64ZVE32F-NEXT: j .LBB103_10
%ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -12440,7 +12608,7 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB104_10
-; RV32ZVE32F-NEXT: .LBB104_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB104_11
; RV32ZVE32F-NEXT: .LBB104_2: # %else2
@@ -12461,7 +12629,7 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: .LBB104_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB104_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB104_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12479,43 +12647,51 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: .LBB104_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB104_1
+; RV32ZVE32F-NEXT: andi a1, a2, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_2
; RV32ZVE32F-NEXT: .LBB104_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB104_2
+; RV32ZVE32F-NEXT: andi a1, a2, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_3
; RV32ZVE32F-NEXT: .LBB104_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB104_3
+; RV32ZVE32F-NEXT: andi a1, a2, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_4
; RV32ZVE32F-NEXT: .LBB104_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB104_4
+; RV32ZVE32F-NEXT: andi a1, a2, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_5
; RV32ZVE32F-NEXT: .LBB104_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB104_5
+; RV32ZVE32F-NEXT: andi a1, a2, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_6
; RV32ZVE32F-NEXT: .LBB104_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB104_6
+; RV32ZVE32F-NEXT: andi a1, a2, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_7
; RV32ZVE32F-NEXT: .LBB104_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB104_7
+; RV32ZVE32F-NEXT: andi a1, a2, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB104_8
+; RV32ZVE32F-NEXT: j .LBB104_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12554,13 +12730,13 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB104_15
-; RV64ZVE32F-NEXT: .LBB104_7: # %else8
+; RV64ZVE32F-NEXT: # %bb.7: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB104_16
; RV64ZVE32F-NEXT: .LBB104_8: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB104_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
+; RV64ZVE32F-NEXT: .LBB104_9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
@@ -12603,14 +12779,17 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB104_7
+; RV64ZVE32F-NEXT: andi a3, a2, 16
+; RV64ZVE32F-NEXT: beqz a3, .LBB104_8
; RV64ZVE32F-NEXT: .LBB104_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB104_8
+; RV64ZVE32F-NEXT: andi a3, a2, 32
+; RV64ZVE32F-NEXT: bnez a3, .LBB104_9
+; RV64ZVE32F-NEXT: j .LBB104_10
%eidxs = sext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -12647,7 +12826,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB105_10
-; RV32ZVE32F-NEXT: .LBB105_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB105_11
; RV32ZVE32F-NEXT: .LBB105_2: # %else2
@@ -12668,7 +12847,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: .LBB105_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB105_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB105_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12686,43 +12865,51 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: .LBB105_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB105_1
+; RV32ZVE32F-NEXT: andi a1, a2, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_2
; RV32ZVE32F-NEXT: .LBB105_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB105_2
+; RV32ZVE32F-NEXT: andi a1, a2, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_3
; RV32ZVE32F-NEXT: .LBB105_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB105_3
+; RV32ZVE32F-NEXT: andi a1, a2, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_4
; RV32ZVE32F-NEXT: .LBB105_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB105_4
+; RV32ZVE32F-NEXT: andi a1, a2, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_5
; RV32ZVE32F-NEXT: .LBB105_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB105_5
+; RV32ZVE32F-NEXT: andi a1, a2, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_6
; RV32ZVE32F-NEXT: .LBB105_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB105_6
+; RV32ZVE32F-NEXT: andi a1, a2, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_7
; RV32ZVE32F-NEXT: .LBB105_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB105_7
+; RV32ZVE32F-NEXT: andi a1, a2, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB105_8
+; RV32ZVE32F-NEXT: j .LBB105_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12764,13 +12951,13 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB105_15
-; RV64ZVE32F-NEXT: .LBB105_7: # %else8
+; RV64ZVE32F-NEXT: # %bb.7: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB105_16
; RV64ZVE32F-NEXT: .LBB105_8: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB105_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
+; RV64ZVE32F-NEXT: .LBB105_9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
@@ -12817,7 +13004,8 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB105_7
+; RV64ZVE32F-NEXT: andi a3, a2, 16
+; RV64ZVE32F-NEXT: beqz a3, .LBB105_8
; RV64ZVE32F-NEXT: .LBB105_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -12825,7 +13013,9 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB105_8
+; RV64ZVE32F-NEXT: andi a3, a2, 32
+; RV64ZVE32F-NEXT: bnez a3, .LBB105_9
+; RV64ZVE32F-NEXT: j .LBB105_10
%eidxs = zext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -12877,7 +13067,7 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez a3, .LBB106_10
-; RV32ZVE32F-NEXT: .LBB106_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB106_11
; RV32ZVE32F-NEXT: .LBB106_2: # %else2
@@ -12898,7 +13088,7 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: .LBB106_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB106_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV32ZVE32F-NEXT: .LBB106_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
@@ -12916,43 +13106,51 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: .LBB106_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB106_1
+; RV32ZVE32F-NEXT: andi a1, a2, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_2
; RV32ZVE32F-NEXT: .LBB106_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB106_2
+; RV32ZVE32F-NEXT: andi a1, a2, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_3
; RV32ZVE32F-NEXT: .LBB106_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB106_3
+; RV32ZVE32F-NEXT: andi a1, a2, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_4
; RV32ZVE32F-NEXT: .LBB106_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB106_4
+; RV32ZVE32F-NEXT: andi a1, a2, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_5
; RV32ZVE32F-NEXT: .LBB106_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB106_5
+; RV32ZVE32F-NEXT: andi a1, a2, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_6
; RV32ZVE32F-NEXT: .LBB106_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB106_6
+; RV32ZVE32F-NEXT: andi a1, a2, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_7
; RV32ZVE32F-NEXT: .LBB106_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB106_7
+; RV32ZVE32F-NEXT: andi a1, a2, -128
+; RV32ZVE32F-NEXT: bnez a1, .LBB106_8
+; RV32ZVE32F-NEXT: j .LBB106_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -12960,7 +13158,7 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a4, a3, 1
; RV64ZVE32F-NEXT: bnez a4, .LBB106_10
-; RV64ZVE32F-NEXT: .LBB106_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a4, a3, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB106_11
; RV64ZVE32F-NEXT: .LBB106_2: # %else2
@@ -12981,7 +13179,7 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: .LBB106_7: # %else17
; RV64ZVE32F-NEXT: andi a3, a3, -128
; RV64ZVE32F-NEXT: beqz a3, .LBB106_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.load19
+; RV64ZVE32F-NEXT: .LBB106_8: # %cond.load19
; RV64ZVE32F-NEXT: ld a2, 56(a2)
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
@@ -13001,43 +13199,51 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa0, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB106_1
+; RV64ZVE32F-NEXT: andi a4, a3, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_2
; RV64ZVE32F-NEXT: .LBB106_11: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa1, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB106_2
+; RV64ZVE32F-NEXT: andi a4, a3, 4
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_3
; RV64ZVE32F-NEXT: .LBB106_12: # %cond.load4
; RV64ZVE32F-NEXT: ld a4, 16(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa2, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB106_3
+; RV64ZVE32F-NEXT: andi a4, a3, 8
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_4
; RV64ZVE32F-NEXT: .LBB106_13: # %cond.load7
; RV64ZVE32F-NEXT: ld a4, 24(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa3, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB106_4
+; RV64ZVE32F-NEXT: andi a4, a3, 16
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_5
; RV64ZVE32F-NEXT: .LBB106_14: # %cond.load10
; RV64ZVE32F-NEXT: ld a4, 32(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa4, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB106_5
+; RV64ZVE32F-NEXT: andi a4, a3, 32
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_6
; RV64ZVE32F-NEXT: .LBB106_15: # %cond.load13
; RV64ZVE32F-NEXT: ld a4, 40(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa5, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB106_6
+; RV64ZVE32F-NEXT: andi a4, a3, 64
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_7
; RV64ZVE32F-NEXT: .LBB106_16: # %cond.load16
; RV64ZVE32F-NEXT: ld a4, 48(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa6, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB106_7
+; RV64ZVE32F-NEXT: andi a3, a3, -128
+; RV64ZVE32F-NEXT: bnez a3, .LBB106_8
+; RV64ZVE32F-NEXT: j .LBB106_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -13485,13 +13691,13 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16
; RV64ZVE32F-NEXT: bltz a2, .LBB108_63
-; RV64ZVE32F-NEXT: .LBB108_31: # %else44
+; RV64ZVE32F-NEXT: # %bb.31: # %else44
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bltz a2, .LBB108_64
; RV64ZVE32F-NEXT: .LBB108_32: # %else47
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bgez a2, .LBB108_34
-; RV64ZVE32F-NEXT: # %bb.33: # %cond.load49
+; RV64ZVE32F-NEXT: .LBB108_33: # %cond.load49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -13683,7 +13889,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15
-; RV64ZVE32F-NEXT: j .LBB108_31
+; RV64ZVE32F-NEXT: slli a2, a1, 47
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_32
; RV64ZVE32F-NEXT: .LBB108_64: # %cond.load46
; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -13691,7 +13898,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
-; RV64ZVE32F-NEXT: j .LBB108_32
+; RV64ZVE32F-NEXT: slli a2, a1, 46
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_33
+; RV64ZVE32F-NEXT: j .LBB108_34
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
%v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
ret <32 x i8> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 1d973d25c4a8b..e86fae6d501e5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -79,15 +79,16 @@ define void @mscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
-; RV64ZVE32F-NEXT: .LBB1_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB1_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB1_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB1_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
; RV64ZVE32F-NEXT: .LBB1_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -126,15 +127,16 @@ define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
; RV64ZVE32F-NEXT: bnez a3, .LBB2_3
-; RV64ZVE32F-NEXT: .LBB2_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB2_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB2_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB2_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB2_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB2_2
; RV64ZVE32F-NEXT: .LBB2_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -182,15 +184,16 @@ define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
; RV64ZVE32F-NEXT: bnez a3, .LBB3_3
-; RV64ZVE32F-NEXT: .LBB3_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB3_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB3_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB3_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB3_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB3_2
; RV64ZVE32F-NEXT: .LBB3_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -244,15 +247,16 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: andi a1, a0, 1
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: bnez a1, .LBB4_3
-; RV64ZVE32F-NEXT: .LBB4_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a0, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB4_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB4_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a2)
-; RV64ZVE32F-NEXT: j .LBB4_1
+; RV64ZVE32F-NEXT: andi a0, a0, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB4_2
; RV64ZVE32F-NEXT: .LBB4_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -287,7 +291,7 @@ define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB5_5
-; RV64ZVE32F-NEXT: .LBB5_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB5_6
; RV64ZVE32F-NEXT: .LBB5_2: # %else2
@@ -296,23 +300,26 @@ define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB5_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB5_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else6
+; RV64ZVE32F-NEXT: .LBB5_4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB5_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB5_1
+; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB5_2
; RV64ZVE32F-NEXT: .LBB5_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse8.v v9, (a4)
-; RV64ZVE32F-NEXT: j .LBB5_2
+; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB5_3
; RV64ZVE32F-NEXT: .LBB5_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB5_3
+; RV64ZVE32F-NEXT: andi a3, a3, 8
+; RV64ZVE32F-NEXT: beqz a3, .LBB5_4
; RV64ZVE32F-NEXT: .LBB5_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -390,7 +397,7 @@ define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB8_9
-; RV64ZVE32F-NEXT: .LBB8_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB8_10
; RV64ZVE32F-NEXT: .LBB8_2: # %else2
@@ -411,43 +418,50 @@ define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB8_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB8_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else14
+; RV64ZVE32F-NEXT: .LBB8_8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB8_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB8_1
+; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB8_2
; RV64ZVE32F-NEXT: .LBB8_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse8.v v9, (t0)
-; RV64ZVE32F-NEXT: j .LBB8_2
+; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB8_3
; RV64ZVE32F-NEXT: .LBB8_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse8.v v9, (a7)
-; RV64ZVE32F-NEXT: j .LBB8_3
+; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: beqz a0, .LBB8_4
; RV64ZVE32F-NEXT: .LBB8_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse8.v v9, (a6)
-; RV64ZVE32F-NEXT: j .LBB8_4
+; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB8_5
; RV64ZVE32F-NEXT: .LBB8_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse8.v v9, (a5)
-; RV64ZVE32F-NEXT: j .LBB8_5
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB8_6
; RV64ZVE32F-NEXT: .LBB8_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
; RV64ZVE32F-NEXT: vse8.v v9, (a3)
-; RV64ZVE32F-NEXT: j .LBB8_6
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: beqz a0, .LBB8_7
; RV64ZVE32F-NEXT: .LBB8_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB8_7
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB8_8
; RV64ZVE32F-NEXT: .LBB8_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -545,10 +559,10 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB9_15
-; RV64ZVE32F-NEXT: .LBB9_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB9_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB9_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -556,7 +570,8 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB9_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB9_14
; RV64ZVE32F-NEXT: .LBB9_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -633,15 +648,16 @@ define void @mscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB11_3
-; RV64ZVE32F-NEXT: .LBB11_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB11_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB11_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB11_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB11_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
; RV64ZVE32F-NEXT: .LBB11_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -681,15 +697,16 @@ define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
; RV64ZVE32F-NEXT: bnez a3, .LBB12_3
-; RV64ZVE32F-NEXT: .LBB12_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB12_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB12_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB12_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB12_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB12_2
; RV64ZVE32F-NEXT: .LBB12_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -741,15 +758,16 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: bnez a1, .LBB13_3
-; RV64ZVE32F-NEXT: .LBB13_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a0, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB13_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB13_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a2)
-; RV64ZVE32F-NEXT: j .LBB13_1
+; RV64ZVE32F-NEXT: andi a0, a0, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB13_2
; RV64ZVE32F-NEXT: .LBB13_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -784,7 +802,7 @@ define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB14_5
-; RV64ZVE32F-NEXT: .LBB14_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB14_6
; RV64ZVE32F-NEXT: .LBB14_2: # %else2
@@ -793,23 +811,26 @@ define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB14_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB14_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else6
+; RV64ZVE32F-NEXT: .LBB14_4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB14_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB14_1
+; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB14_2
; RV64ZVE32F-NEXT: .LBB14_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse16.v v9, (a4)
-; RV64ZVE32F-NEXT: j .LBB14_2
+; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB14_3
; RV64ZVE32F-NEXT: .LBB14_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB14_3
+; RV64ZVE32F-NEXT: andi a3, a3, 8
+; RV64ZVE32F-NEXT: beqz a3, .LBB14_4
; RV64ZVE32F-NEXT: .LBB14_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -887,7 +908,7 @@ define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB17_9
-; RV64ZVE32F-NEXT: .LBB17_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB17_10
; RV64ZVE32F-NEXT: .LBB17_2: # %else2
@@ -908,43 +929,50 @@ define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB17_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB17_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else14
+; RV64ZVE32F-NEXT: .LBB17_8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB17_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB17_1
+; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB17_2
; RV64ZVE32F-NEXT: .LBB17_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse16.v v9, (t0)
-; RV64ZVE32F-NEXT: j .LBB17_2
+; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB17_3
; RV64ZVE32F-NEXT: .LBB17_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse16.v v9, (a7)
-; RV64ZVE32F-NEXT: j .LBB17_3
+; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: beqz a0, .LBB17_4
; RV64ZVE32F-NEXT: .LBB17_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse16.v v9, (a6)
-; RV64ZVE32F-NEXT: j .LBB17_4
+; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB17_5
; RV64ZVE32F-NEXT: .LBB17_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a5)
-; RV64ZVE32F-NEXT: j .LBB17_5
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB17_6
; RV64ZVE32F-NEXT: .LBB17_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
; RV64ZVE32F-NEXT: vse16.v v9, (a3)
-; RV64ZVE32F-NEXT: j .LBB17_6
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: beqz a0, .LBB17_7
; RV64ZVE32F-NEXT: .LBB17_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB17_7
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB17_8
; RV64ZVE32F-NEXT: .LBB17_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -1051,10 +1079,10 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB18_15
-; RV64ZVE32F-NEXT: .LBB18_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB18_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB18_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB18_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1063,7 +1091,8 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB18_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB18_14
; RV64ZVE32F-NEXT: .LBB18_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1176,10 +1205,10 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB19_15
-; RV64ZVE32F-NEXT: .LBB19_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB19_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB19_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB19_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1188,7 +1217,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB19_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB19_14
; RV64ZVE32F-NEXT: .LBB19_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1306,10 +1336,10 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB20_15
-; RV64ZVE32F-NEXT: .LBB20_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB20_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB20_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB20_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1319,7 +1349,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB20_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB20_14
; RV64ZVE32F-NEXT: .LBB20_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1431,10 +1462,10 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB21_15
-; RV64ZVE32F-NEXT: .LBB21_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB21_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB21_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB21_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -1443,7 +1474,8 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB21_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB21_14
; RV64ZVE32F-NEXT: .LBB21_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -1521,15 +1553,16 @@ define void @mscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB23_3
-; RV64ZVE32F-NEXT: .LBB23_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB23_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB23_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB23_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB23_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB23_2
; RV64ZVE32F-NEXT: .LBB23_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -1574,15 +1607,16 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: bnez a4, .LBB24_3
-; RV64ZVE32F-NEXT: .LBB24_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a0, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB24_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB24_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a2)
-; RV64ZVE32F-NEXT: j .LBB24_1
+; RV64ZVE32F-NEXT: andi a0, a0, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB24_2
; RV64ZVE32F-NEXT: .LBB24_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -1617,7 +1651,7 @@ define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB25_5
-; RV64ZVE32F-NEXT: .LBB25_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB25_6
; RV64ZVE32F-NEXT: .LBB25_2: # %else2
@@ -1626,23 +1660,26 @@ define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB25_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB25_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else6
+; RV64ZVE32F-NEXT: .LBB25_4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB25_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB25_1
+; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB25_2
; RV64ZVE32F-NEXT: .LBB25_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse32.v v9, (a4)
-; RV64ZVE32F-NEXT: j .LBB25_2
+; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB25_3
; RV64ZVE32F-NEXT: .LBB25_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse32.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB25_3
+; RV64ZVE32F-NEXT: andi a3, a3, 8
+; RV64ZVE32F-NEXT: beqz a3, .LBB25_4
; RV64ZVE32F-NEXT: .LBB25_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -1720,7 +1757,7 @@ define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB28_9
-; RV64ZVE32F-NEXT: .LBB28_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB28_10
; RV64ZVE32F-NEXT: .LBB28_2: # %else2
@@ -1741,46 +1778,53 @@ define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB28_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB28_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else14
+; RV64ZVE32F-NEXT: .LBB28_8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB28_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB28_1
+; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB28_2
; RV64ZVE32F-NEXT: .LBB28_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vse32.v v10, (t0)
-; RV64ZVE32F-NEXT: j .LBB28_2
+; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB28_3
; RV64ZVE32F-NEXT: .LBB28_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV64ZVE32F-NEXT: vse32.v v10, (a7)
-; RV64ZVE32F-NEXT: j .LBB28_3
+; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: beqz a0, .LBB28_4
; RV64ZVE32F-NEXT: .LBB28_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a6)
-; RV64ZVE32F-NEXT: j .LBB28_4
+; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB28_5
; RV64ZVE32F-NEXT: .LBB28_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a5)
-; RV64ZVE32F-NEXT: j .LBB28_5
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB28_6
; RV64ZVE32F-NEXT: .LBB28_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a3)
-; RV64ZVE32F-NEXT: j .LBB28_6
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: beqz a0, .LBB28_7
; RV64ZVE32F-NEXT: .LBB28_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB28_7
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB28_8
; RV64ZVE32F-NEXT: .LBB28_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -1889,10 +1933,10 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB29_15
-; RV64ZVE32F-NEXT: .LBB29_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB29_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB29_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB29_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -1902,7 +1946,8 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB29_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB29_14
; RV64ZVE32F-NEXT: .LBB29_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2017,10 +2062,10 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB30_15
-; RV64ZVE32F-NEXT: .LBB30_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB30_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB30_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB30_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2030,7 +2075,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB30_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB30_14
; RV64ZVE32F-NEXT: .LBB30_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2153,10 +2199,10 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB31_15
-; RV64ZVE32F-NEXT: .LBB31_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB31_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB31_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB31_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2167,7 +2213,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB31_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB31_14
; RV64ZVE32F-NEXT: .LBB31_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2286,10 +2333,10 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB32_15
-; RV64ZVE32F-NEXT: .LBB32_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB32_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB32_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB32_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2299,7 +2346,8 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB32_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB32_14
; RV64ZVE32F-NEXT: .LBB32_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2416,10 +2464,10 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB33_15
-; RV64ZVE32F-NEXT: .LBB33_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB33_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB33_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB33_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2429,7 +2477,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB33_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB33_14
; RV64ZVE32F-NEXT: .LBB33_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2553,10 +2602,10 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB34_15
-; RV64ZVE32F-NEXT: .LBB34_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB34_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2567,7 +2616,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB34_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB34_14
; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2642,13 +2692,13 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB35_13
-; RV64ZVE32F-NEXT: .LBB35_7: # %else6
+; RV64ZVE32F-NEXT: # %bb.7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
; RV64ZVE32F-NEXT: .LBB35_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB35_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: .LBB35_9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -2663,10 +2713,10 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
-; RV64ZVE32F-NEXT: .LBB35_11: # %else12
+; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB35_16
-; RV64ZVE32F-NEXT: # %bb.12: # %else14
+; RV64ZVE32F-NEXT: .LBB35_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB35_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -2676,7 +2726,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB35_7
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB35_8
; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -2685,7 +2736,9 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB35_8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB35_9
+; RV64ZVE32F-NEXT: j .LBB35_10
; RV64ZVE32F-NEXT: .LBB35_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -2694,7 +2747,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB35_11
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB35_12
; RV64ZVE32F-NEXT: .LBB35_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -2775,10 +2829,10 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a3, v0
; RV32ZVE32F-NEXT: andi a4, a3, 1
; RV32ZVE32F-NEXT: bnez a4, .LBB37_3
-; RV32ZVE32F-NEXT: .LBB37_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a3, a3, 2
; RV32ZVE32F-NEXT: bnez a3, .LBB37_4
-; RV32ZVE32F-NEXT: # %bb.2: # %else2
+; RV32ZVE32F-NEXT: .LBB37_2: # %else2
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB37_3: # %cond.store
; RV32ZVE32F-NEXT: lw a4, 0(a0)
@@ -2787,7 +2841,8 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a5, v8
; RV32ZVE32F-NEXT: sw a4, 0(a5)
; RV32ZVE32F-NEXT: sw a0, 4(a5)
-; RV32ZVE32F-NEXT: j .LBB37_1
+; RV32ZVE32F-NEXT: andi a3, a3, 2
+; RV32ZVE32F-NEXT: beqz a3, .LBB37_2
; RV32ZVE32F-NEXT: .LBB37_4: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -2802,14 +2857,15 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi a5, a4, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB37_3
-; RV64ZVE32F-NEXT: .LBB37_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a4, a4, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB37_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB37_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB37_3: # %cond.store
; RV64ZVE32F-NEXT: sd a0, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB37_1
+; RV64ZVE32F-NEXT: andi a4, a4, 2
+; RV64ZVE32F-NEXT: beqz a4, .LBB37_2
; RV64ZVE32F-NEXT: .LBB37_4: # %cond.store1
; RV64ZVE32F-NEXT: sd a1, 0(a3)
; RV64ZVE32F-NEXT: ret
@@ -2844,7 +2900,7 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a5, v0
; RV32ZVE32F-NEXT: andi t0, a5, 1
; RV32ZVE32F-NEXT: bnez t0, .LBB38_5
-; RV32ZVE32F-NEXT: .LBB38_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a5, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB38_6
; RV32ZVE32F-NEXT: .LBB38_2: # %else2
@@ -2853,7 +2909,7 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: .LBB38_3: # %else4
; RV32ZVE32F-NEXT: andi a5, a5, 8
; RV32ZVE32F-NEXT: bnez a5, .LBB38_8
-; RV32ZVE32F-NEXT: # %bb.4: # %else6
+; RV32ZVE32F-NEXT: .LBB38_4: # %else6
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB38_5: # %cond.store
; RV32ZVE32F-NEXT: lw t0, 0(a0)
@@ -2862,21 +2918,24 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s t1, v8
; RV32ZVE32F-NEXT: sw t0, 0(t1)
; RV32ZVE32F-NEXT: sw a0, 4(t1)
-; RV32ZVE32F-NEXT: j .LBB38_1
+; RV32ZVE32F-NEXT: andi a0, a5, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB38_2
; RV32ZVE32F-NEXT: .LBB38_6: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v9
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB38_2
+; RV32ZVE32F-NEXT: andi a0, a5, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB38_3
; RV32ZVE32F-NEXT: .LBB38_7: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v9
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a4, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB38_3
+; RV32ZVE32F-NEXT: andi a5, a5, 8
+; RV32ZVE32F-NEXT: beqz a5, .LBB38_4
; RV32ZVE32F-NEXT: .LBB38_8: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -2897,7 +2956,7 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a7, v0
; RV64ZVE32F-NEXT: andi t1, a7, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB38_5
-; RV64ZVE32F-NEXT: .LBB38_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a7, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB38_6
; RV64ZVE32F-NEXT: .LBB38_2: # %else2
@@ -2906,19 +2965,22 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB38_3: # %else4
; RV64ZVE32F-NEXT: andi a0, a7, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB38_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else6
+; RV64ZVE32F-NEXT: .LBB38_4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB38_5: # %cond.store
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: sd a0, 0(a1)
-; RV64ZVE32F-NEXT: j .LBB38_1
+; RV64ZVE32F-NEXT: andi a0, a7, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB38_2
; RV64ZVE32F-NEXT: .LBB38_6: # %cond.store1
; RV64ZVE32F-NEXT: sd t0, 0(a6)
-; RV64ZVE32F-NEXT: j .LBB38_2
+; RV64ZVE32F-NEXT: andi a0, a7, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB38_3
; RV64ZVE32F-NEXT: .LBB38_7: # %cond.store3
; RV64ZVE32F-NEXT: sd a5, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB38_3
+; RV64ZVE32F-NEXT: andi a0, a7, 8
+; RV64ZVE32F-NEXT: beqz a0, .LBB38_4
; RV64ZVE32F-NEXT: .LBB38_8: # %cond.store5
; RV64ZVE32F-NEXT: sd a3, 0(a2)
; RV64ZVE32F-NEXT: ret
@@ -3038,7 +3100,7 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a7, v0
; RV32ZVE32F-NEXT: andi s1, a7, 1
; RV32ZVE32F-NEXT: bnez s1, .LBB41_10
-; RV32ZVE32F-NEXT: .LBB41_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a7, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB41_11
; RV32ZVE32F-NEXT: .LBB41_2: # %else2
@@ -3059,7 +3121,7 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: .LBB41_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a7, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB41_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB41_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3083,49 +3145,57 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw s1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB41_1
+; RV32ZVE32F-NEXT: andi a0, a7, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB41_2
; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t6, 0(a0)
; RV32ZVE32F-NEXT: sw s0, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB41_2
+; RV32ZVE32F-NEXT: andi a0, a7, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB41_3
; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t4, 0(a0)
; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB41_3
+; RV32ZVE32F-NEXT: andi a0, a7, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB41_4
; RV32ZVE32F-NEXT: .LBB41_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t2, 0(a0)
; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB41_4
+; RV32ZVE32F-NEXT: andi a0, a7, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB41_5
; RV32ZVE32F-NEXT: .LBB41_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t0, 0(a0)
; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB41_5
+; RV32ZVE32F-NEXT: andi a0, a7, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB41_6
; RV32ZVE32F-NEXT: .LBB41_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a5, 0(a0)
; RV32ZVE32F-NEXT: sw a6, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB41_6
+; RV32ZVE32F-NEXT: andi a0, a7, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB41_7
; RV32ZVE32F-NEXT: .LBB41_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a4, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB41_7
+; RV32ZVE32F-NEXT: andi a0, a7, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB41_8
+; RV32ZVE32F-NEXT: j .LBB41_9
;
; RV64ZVE32F-LABEL: mscatter_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -3156,7 +3226,7 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s t1, v0
; RV64ZVE32F-NEXT: andi s2, t1, 1
; RV64ZVE32F-NEXT: bnez s2, .LBB41_10
-; RV64ZVE32F-NEXT: .LBB41_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, t1, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB41_11
; RV64ZVE32F-NEXT: .LBB41_2: # %else2
@@ -3177,7 +3247,7 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB41_7: # %else12
; RV64ZVE32F-NEXT: andi a0, t1, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB41_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV64ZVE32F-NEXT: .LBB41_8: # %cond.store13
; RV64ZVE32F-NEXT: sd a5, 0(a2)
; RV64ZVE32F-NEXT: .LBB41_9: # %else14
; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
@@ -3194,25 +3264,33 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: sd a0, 0(a1)
-; RV64ZVE32F-NEXT: j .LBB41_1
+; RV64ZVE32F-NEXT: andi a0, t1, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB41_2
; RV64ZVE32F-NEXT: .LBB41_11: # %cond.store1
; RV64ZVE32F-NEXT: sd s1, 0(t5)
-; RV64ZVE32F-NEXT: j .LBB41_2
+; RV64ZVE32F-NEXT: andi a0, t1, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB41_3
; RV64ZVE32F-NEXT: .LBB41_12: # %cond.store3
; RV64ZVE32F-NEXT: sd s0, 0(t3)
-; RV64ZVE32F-NEXT: j .LBB41_3
+; RV64ZVE32F-NEXT: andi a0, t1, 8
+; RV64ZVE32F-NEXT: beqz a0, .LBB41_4
; RV64ZVE32F-NEXT: .LBB41_13: # %cond.store5
; RV64ZVE32F-NEXT: sd t6, 0(t2)
-; RV64ZVE32F-NEXT: j .LBB41_4
+; RV64ZVE32F-NEXT: andi a0, t1, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB41_5
; RV64ZVE32F-NEXT: .LBB41_14: # %cond.store7
; RV64ZVE32F-NEXT: sd t4, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB41_5
+; RV64ZVE32F-NEXT: andi a0, t1, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB41_6
; RV64ZVE32F-NEXT: .LBB41_15: # %cond.store9
; RV64ZVE32F-NEXT: sd a7, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB41_6
+; RV64ZVE32F-NEXT: andi a0, t1, 64
+; RV64ZVE32F-NEXT: beqz a0, .LBB41_7
; RV64ZVE32F-NEXT: .LBB41_16: # %cond.store11
; RV64ZVE32F-NEXT: sd a6, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB41_7
+; RV64ZVE32F-NEXT: andi a0, t1, -128
+; RV64ZVE32F-NEXT: bnez a0, .LBB41_8
+; RV64ZVE32F-NEXT: j .LBB41_9
call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
ret void
}
@@ -3269,7 +3347,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB42_10
-; RV32ZVE32F-NEXT: .LBB42_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB42_11
; RV32ZVE32F-NEXT: .LBB42_2: # %else2
@@ -3290,7 +3368,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32ZVE32F-NEXT: .LBB42_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB42_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB42_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3313,49 +3391,57 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB42_1
+; RV32ZVE32F-NEXT: andi a0, t0, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB42_2
; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB42_2
+; RV32ZVE32F-NEXT: andi a0, t0, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB42_3
; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB42_3
+; RV32ZVE32F-NEXT: andi a0, t0, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB42_4
; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB42_4
+; RV32ZVE32F-NEXT: andi a0, t0, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB42_5
; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB42_5
+; RV32ZVE32F-NEXT: andi a0, t0, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB42_6
; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB42_6
+; RV32ZVE32F-NEXT: andi a0, t0, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB42_7
; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB42_7
+; RV32ZVE32F-NEXT: andi a0, t0, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB42_8
+; RV32ZVE32F-NEXT: j .LBB42_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -3430,17 +3516,18 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB42_15
-; RV64ZVE32F-NEXT: .LBB42_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB42_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB42_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB42_13
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB42_14
; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3505,7 +3592,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB43_10
-; RV32ZVE32F-NEXT: .LBB43_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB43_11
; RV32ZVE32F-NEXT: .LBB43_2: # %else2
@@ -3526,7 +3613,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: .LBB43_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB43_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB43_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3549,49 +3636,57 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB43_1
+; RV32ZVE32F-NEXT: andi a0, t0, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB43_2
; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB43_2
+; RV32ZVE32F-NEXT: andi a0, t0, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB43_3
; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB43_3
+; RV32ZVE32F-NEXT: andi a0, t0, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB43_4
; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB43_4
+; RV32ZVE32F-NEXT: andi a0, t0, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB43_5
; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB43_5
+; RV32ZVE32F-NEXT: andi a0, t0, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB43_6
; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB43_6
+; RV32ZVE32F-NEXT: andi a0, t0, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB43_7
; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB43_7
+; RV32ZVE32F-NEXT: andi a0, t0, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB43_8
+; RV32ZVE32F-NEXT: j .LBB43_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -3666,17 +3761,18 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB43_15
-; RV64ZVE32F-NEXT: .LBB43_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB43_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB43_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB43_13
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB43_14
; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3743,7 +3839,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB44_10
-; RV32ZVE32F-NEXT: .LBB44_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB44_11
; RV32ZVE32F-NEXT: .LBB44_2: # %else2
@@ -3764,7 +3860,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: .LBB44_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB44_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB44_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3787,49 +3883,57 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB44_1
+; RV32ZVE32F-NEXT: andi a0, t0, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB44_2
; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB44_2
+; RV32ZVE32F-NEXT: andi a0, t0, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB44_3
; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB44_3
+; RV32ZVE32F-NEXT: andi a0, t0, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB44_4
; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB44_4
+; RV32ZVE32F-NEXT: andi a0, t0, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB44_5
; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB44_5
+; RV32ZVE32F-NEXT: andi a0, t0, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB44_6
; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB44_6
+; RV32ZVE32F-NEXT: andi a0, t0, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB44_7
; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB44_7
+; RV32ZVE32F-NEXT: andi a0, t0, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB44_8
+; RV32ZVE32F-NEXT: j .LBB44_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -3910,10 +4014,10 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB44_15
-; RV64ZVE32F-NEXT: .LBB44_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB44_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB44_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3921,7 +4025,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB44_13
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB44_14
; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -3988,7 +4093,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, s1, v8
; RV32ZVE32F-NEXT: bnez s2, .LBB45_10
-; RV32ZVE32F-NEXT: .LBB45_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB45_11
; RV32ZVE32F-NEXT: .LBB45_2: # %else2
@@ -4009,7 +4114,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32ZVE32F-NEXT: .LBB45_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB45_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4033,49 +4138,57 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32ZVE32F-NEXT: vmv.x.s s2, v10
; RV32ZVE32F-NEXT: sw s1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB45_1
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB45_2
; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t6, 0(a0)
; RV32ZVE32F-NEXT: sw s0, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB45_2
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB45_3
; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t4, 0(a0)
; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB45_3
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB45_4
; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t2, 0(a0)
; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB45_4
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB45_5
; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t0, 0(a0)
; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB45_5
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB45_6
; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB45_6
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB45_7
; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB45_7
+; RV32ZVE32F-NEXT: andi a0, a1, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB45_8
+; RV32ZVE32F-NEXT: j .LBB45_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -4151,17 +4264,18 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB45_15
-; RV64ZVE32F-NEXT: .LBB45_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB45_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB45_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB45_13
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB45_14
; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4226,7 +4340,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, s1, v8
; RV32ZVE32F-NEXT: bnez s2, .LBB46_10
-; RV32ZVE32F-NEXT: .LBB46_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB46_11
; RV32ZVE32F-NEXT: .LBB46_2: # %else2
@@ -4247,7 +4361,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .LBB46_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB46_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4271,49 +4385,57 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s s2, v10
; RV32ZVE32F-NEXT: sw s1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB46_1
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB46_2
; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t6, 0(a0)
; RV32ZVE32F-NEXT: sw s0, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB46_2
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB46_3
; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t4, 0(a0)
; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB46_3
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB46_4
; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t2, 0(a0)
; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB46_4
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB46_5
; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t0, 0(a0)
; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB46_5
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB46_6
; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB46_6
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB46_7
; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB46_7
+; RV32ZVE32F-NEXT: andi a0, a1, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB46_8
+; RV32ZVE32F-NEXT: j .LBB46_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -4389,17 +4511,18 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB46_15
-; RV64ZVE32F-NEXT: .LBB46_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB46_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB46_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB46_13
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB46_14
; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4466,7 +4589,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccu.vx v10, s1, v8
; RV32ZVE32F-NEXT: bnez s2, .LBB47_10
-; RV32ZVE32F-NEXT: .LBB47_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB47_11
; RV32ZVE32F-NEXT: .LBB47_2: # %else2
@@ -4487,7 +4610,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .LBB47_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB47_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4511,49 +4634,57 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s s2, v10
; RV32ZVE32F-NEXT: sw s1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB47_1
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB47_2
; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t6, 0(a0)
; RV32ZVE32F-NEXT: sw s0, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB47_2
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB47_3
; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t4, 0(a0)
; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB47_3
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB47_4
; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t2, 0(a0)
; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB47_4
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB47_5
; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw t0, 0(a0)
; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB47_5
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB47_6
; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB47_6
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB47_7
; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB47_7
+; RV32ZVE32F-NEXT: andi a0, a1, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB47_8
+; RV32ZVE32F-NEXT: j .LBB47_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -4635,10 +4766,10 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB47_15
-; RV64ZVE32F-NEXT: .LBB47_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB47_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB47_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4646,7 +4777,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB47_13
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB47_14
; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4712,7 +4844,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB48_10
-; RV32ZVE32F-NEXT: .LBB48_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB48_11
; RV32ZVE32F-NEXT: .LBB48_2: # %else2
@@ -4733,7 +4865,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32ZVE32F-NEXT: .LBB48_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB48_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4756,49 +4888,57 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB48_1
+; RV32ZVE32F-NEXT: andi a0, t0, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB48_2
; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB48_2
+; RV32ZVE32F-NEXT: andi a0, t0, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB48_3
; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB48_3
+; RV32ZVE32F-NEXT: andi a0, t0, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB48_4
; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB48_4
+; RV32ZVE32F-NEXT: andi a0, t0, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB48_5
; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB48_5
+; RV32ZVE32F-NEXT: andi a0, t0, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB48_6
; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB48_6
+; RV32ZVE32F-NEXT: andi a0, t0, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB48_7
; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB48_7
+; RV32ZVE32F-NEXT: andi a0, t0, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB48_8
+; RV32ZVE32F-NEXT: j .LBB48_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -4845,13 +4985,13 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB48_13
-; RV64ZVE32F-NEXT: .LBB48_7: # %else6
+; RV64ZVE32F-NEXT: # %bb.7: # %else6
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB48_14
; RV64ZVE32F-NEXT: .LBB48_8: # %else8
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB48_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: .LBB48_9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
@@ -4863,10 +5003,10 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB48_15
-; RV64ZVE32F-NEXT: .LBB48_11: # %else12
+; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB48_16
-; RV64ZVE32F-NEXT: # %bb.12: # %else14
+; RV64ZVE32F-NEXT: .LBB48_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -4875,20 +5015,24 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB48_7
+; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB48_8
; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB48_8
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: bnez a0, .LBB48_9
+; RV64ZVE32F-NEXT: j .LBB48_10
; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB48_11
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB48_12
; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4952,7 +5096,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB49_10
-; RV32ZVE32F-NEXT: .LBB49_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB49_11
; RV32ZVE32F-NEXT: .LBB49_2: # %else2
@@ -4973,7 +5117,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .LBB49_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB49_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -4996,49 +5140,57 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB49_1
+; RV32ZVE32F-NEXT: andi a0, t0, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB49_2
; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB49_2
+; RV32ZVE32F-NEXT: andi a0, t0, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB49_3
; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB49_3
+; RV32ZVE32F-NEXT: andi a0, t0, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB49_4
; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB49_4
+; RV32ZVE32F-NEXT: andi a0, t0, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB49_5
; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB49_5
+; RV32ZVE32F-NEXT: andi a0, t0, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB49_6
; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB49_6
+; RV32ZVE32F-NEXT: andi a0, t0, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB49_7
; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB49_7
+; RV32ZVE32F-NEXT: andi a0, t0, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB49_8
+; RV32ZVE32F-NEXT: j .LBB49_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -5085,13 +5237,13 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB49_13
-; RV64ZVE32F-NEXT: .LBB49_7: # %else6
+; RV64ZVE32F-NEXT: # %bb.7: # %else6
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB49_14
; RV64ZVE32F-NEXT: .LBB49_8: # %else8
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB49_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: .LBB49_9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
@@ -5103,10 +5255,10 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB49_15
-; RV64ZVE32F-NEXT: .LBB49_11: # %else12
+; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB49_16
-; RV64ZVE32F-NEXT: # %bb.12: # %else14
+; RV64ZVE32F-NEXT: .LBB49_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -5115,20 +5267,24 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB49_7
+; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB49_8
; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB49_8
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: bnez a0, .LBB49_9
+; RV64ZVE32F-NEXT: j .LBB49_10
; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB49_11
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB49_12
; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5193,7 +5349,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB50_10
-; RV32ZVE32F-NEXT: .LBB50_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB50_11
; RV32ZVE32F-NEXT: .LBB50_2: # %else2
@@ -5214,7 +5370,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .LBB50_7: # %else12
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB50_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5237,49 +5393,57 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB50_1
+; RV32ZVE32F-NEXT: andi a0, t0, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB50_2
; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB50_2
+; RV32ZVE32F-NEXT: andi a0, t0, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB50_3
; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB50_3
+; RV32ZVE32F-NEXT: andi a0, t0, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB50_4
; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB50_4
+; RV32ZVE32F-NEXT: andi a0, t0, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB50_5
; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB50_5
+; RV32ZVE32F-NEXT: andi a0, t0, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB50_6
; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB50_6
+; RV32ZVE32F-NEXT: andi a0, t0, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB50_7
; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a4, 0(a0)
; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB50_7
+; RV32ZVE32F-NEXT: andi a0, t0, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB50_8
+; RV32ZVE32F-NEXT: j .LBB50_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -5329,13 +5493,13 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB50_13
-; RV64ZVE32F-NEXT: .LBB50_7: # %else6
+; RV64ZVE32F-NEXT: # %bb.7: # %else6
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB50_14
; RV64ZVE32F-NEXT: .LBB50_8: # %else8
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB50_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: .LBB50_9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
@@ -5348,10 +5512,10 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB50_15
-; RV64ZVE32F-NEXT: .LBB50_11: # %else12
+; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB50_16
-; RV64ZVE32F-NEXT: # %bb.12: # %else14
+; RV64ZVE32F-NEXT: .LBB50_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -5361,7 +5525,8 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB50_7
+; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB50_8
; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5369,14 +5534,17 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB50_8
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: bnez a0, .LBB50_9
+; RV64ZVE32F-NEXT: j .LBB50_10
; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB50_11
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB50_12
; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5471,7 +5639,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: andi s2, a2, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB51_10
-; RV32ZVE32F-NEXT: .LBB51_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a2, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB51_11
; RV32ZVE32F-NEXT: .LBB51_2: # %else2
@@ -5492,7 +5660,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: .LBB51_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a2, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB51_9
-; RV32ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -5529,49 +5697,57 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 0(s2)
; RV32ZVE32F-NEXT: sw a0, 4(s2)
-; RV32ZVE32F-NEXT: j .LBB51_1
+; RV32ZVE32F-NEXT: andi a0, a2, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_2
; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 0(a0)
; RV32ZVE32F-NEXT: sw s1, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB51_2
+; RV32ZVE32F-NEXT: andi a0, a2, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_3
; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t5, 0(a0)
; RV32ZVE32F-NEXT: sw t6, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB51_3
+; RV32ZVE32F-NEXT: andi a0, a2, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_4
; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t3, 0(a0)
; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB51_4
+; RV32ZVE32F-NEXT: andi a0, a2, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_5
; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw t1, 0(a0)
; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB51_5
+; RV32ZVE32F-NEXT: andi a0, a2, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_6
; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a7, 0(a0)
; RV32ZVE32F-NEXT: sw t0, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB51_6
+; RV32ZVE32F-NEXT: andi a0, a2, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB51_7
; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw a5, 0(a0)
; RV32ZVE32F-NEXT: sw a6, 4(a0)
-; RV32ZVE32F-NEXT: j .LBB51_7
+; RV32ZVE32F-NEXT: andi a0, a2, -128
+; RV32ZVE32F-NEXT: bnez a0, .LBB51_8
+; RV32ZVE32F-NEXT: j .LBB51_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i64:
; RV64ZVE32F: # %bb.0:
@@ -5604,7 +5780,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV64ZVE32F-NEXT: vmv.x.s a7, v0
; RV64ZVE32F-NEXT: andi s3, a7, 1
; RV64ZVE32F-NEXT: bnez s3, .LBB51_10
-; RV64ZVE32F-NEXT: .LBB51_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a7, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB51_11
; RV64ZVE32F-NEXT: .LBB51_2: # %else2
@@ -5625,7 +5801,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV64ZVE32F-NEXT: .LBB51_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a7, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB51_9
-; RV64ZVE32F-NEXT: # %bb.8: # %cond.store13
+; RV64ZVE32F-NEXT: .LBB51_8: # %cond.store13
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a1, a1, a6
; RV64ZVE32F-NEXT: sd a3, 0(a1)
@@ -5648,37 +5824,45 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a1, a2
; RV64ZVE32F-NEXT: sd a0, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB51_1
+; RV64ZVE32F-NEXT: andi a0, a7, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB51_2
; RV64ZVE32F-NEXT: .LBB51_11: # %cond.store1
; RV64ZVE32F-NEXT: slli s2, s2, 3
; RV64ZVE32F-NEXT: add s2, a1, s2
; RV64ZVE32F-NEXT: sd s0, 0(s2)
-; RV64ZVE32F-NEXT: j .LBB51_2
+; RV64ZVE32F-NEXT: andi a0, a7, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB51_3
; RV64ZVE32F-NEXT: .LBB51_12: # %cond.store3
; RV64ZVE32F-NEXT: slli s1, s1, 3
; RV64ZVE32F-NEXT: add s1, a1, s1
; RV64ZVE32F-NEXT: sd t5, 0(s1)
-; RV64ZVE32F-NEXT: j .LBB51_3
+; RV64ZVE32F-NEXT: andi a0, a7, 8
+; RV64ZVE32F-NEXT: beqz a0, .LBB51_4
; RV64ZVE32F-NEXT: .LBB51_13: # %cond.store5
; RV64ZVE32F-NEXT: slli t6, t6, 3
; RV64ZVE32F-NEXT: add t6, a1, t6
; RV64ZVE32F-NEXT: sd t3, 0(t6)
-; RV64ZVE32F-NEXT: j .LBB51_4
+; RV64ZVE32F-NEXT: andi a0, a7, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB51_5
; RV64ZVE32F-NEXT: .LBB51_14: # %cond.store7
; RV64ZVE32F-NEXT: slli t4, t4, 3
; RV64ZVE32F-NEXT: add t4, a1, t4
; RV64ZVE32F-NEXT: sd t1, 0(t4)
-; RV64ZVE32F-NEXT: j .LBB51_5
+; RV64ZVE32F-NEXT: andi a0, a7, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB51_6
; RV64ZVE32F-NEXT: .LBB51_15: # %cond.store9
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: sd a5, 0(t2)
-; RV64ZVE32F-NEXT: j .LBB51_6
+; RV64ZVE32F-NEXT: andi a0, a7, 64
+; RV64ZVE32F-NEXT: beqz a0, .LBB51_7
; RV64ZVE32F-NEXT: .LBB51_16: # %cond.store11
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: sd a4, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB51_7
+; RV64ZVE32F-NEXT: andi a0, a7, -128
+; RV64ZVE32F-NEXT: bnez a0, .LBB51_8
+; RV64ZVE32F-NEXT: j .LBB51_9
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
ret void
@@ -5748,17 +5932,18 @@ define void @mscatter_v2bf16(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB53_3
-; RV64ZVE32F-NEXT: .LBB53_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB53_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB53_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB53_3: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB53_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB53_2
; RV64ZVE32F-NEXT: .LBB53_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -5794,7 +5979,7 @@ define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB54_5
-; RV64ZVE32F-NEXT: .LBB54_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB54_6
; RV64ZVE32F-NEXT: .LBB54_2: # %else2
@@ -5803,7 +5988,7 @@ define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB54_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB54_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else6
+; RV64ZVE32F-NEXT: .LBB54_4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB54_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -5811,21 +5996,24 @@ define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a5, v8
; RV64ZVE32F-NEXT: fmv.h.x fa5, a5
; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB54_1
+; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB54_2
; RV64ZVE32F-NEXT: .LBB54_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB54_2
+; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB54_3
; RV64ZVE32F-NEXT: .LBB54_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB54_3
+; RV64ZVE32F-NEXT: andi a3, a3, 8
+; RV64ZVE32F-NEXT: beqz a3, .LBB54_4
; RV64ZVE32F-NEXT: .LBB54_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -5913,7 +6101,7 @@ define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB57_9
-; RV64ZVE32F-NEXT: .LBB57_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB57_10
; RV64ZVE32F-NEXT: .LBB57_2: # %else2
@@ -5934,7 +6122,7 @@ define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB57_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB57_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else14
+; RV64ZVE32F-NEXT: .LBB57_8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB57_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -5942,49 +6130,56 @@ define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: fmv.h.x fa5, t1
; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB57_1
+; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB57_2
; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB57_2
+; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB57_3
; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB57_3
+; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: beqz a0, .LBB57_4
; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a6)
-; RV64ZVE32F-NEXT: j .LBB57_4
+; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB57_5
; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a5)
-; RV64ZVE32F-NEXT: j .LBB57_5
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB57_6
; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB57_6
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: beqz a0, .LBB57_7
; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB57_7
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB57_8
; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -6104,10 +6299,10 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB58_15
-; RV64ZVE32F-NEXT: .LBB58_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB58_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB58_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB58_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6118,7 +6313,8 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB58_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB58_14
; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6246,10 +6442,10 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB59_15
-; RV64ZVE32F-NEXT: .LBB59_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB59_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB59_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB59_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6260,7 +6456,8 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB59_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB59_14
; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6393,10 +6590,10 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB60_15
-; RV64ZVE32F-NEXT: .LBB60_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB60_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB60_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB60_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6408,7 +6605,8 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB60_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB60_14
; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6535,10 +6733,10 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB61_15
-; RV64ZVE32F-NEXT: .LBB61_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB61_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB61_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB61_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -6549,7 +6747,8 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB61_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB61_14
; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -6642,15 +6841,16 @@ define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1
; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB63_3
-; RV64ZVE32F-ZVFH-NEXT: .LBB63_1: # %else
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB63_4
-; RV64ZVE32F-ZVFH-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-ZVFH-NEXT: .LBB63_2: # %else2
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB63_3: # %cond.store
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB63_1
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB63_2
; RV64ZVE32F-ZVFH-NEXT: .LBB63_4: # %cond.store1
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
@@ -6663,17 +6863,18 @@ define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB63_3
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB63_4
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_2: # %else2
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_3: # %cond.store
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB63_1
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB63_2
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
@@ -6709,7 +6910,7 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-ZVFH-NEXT: andi a5, a3, 1
; RV64ZVE32F-ZVFH-NEXT: bnez a5, .LBB64_5
-; RV64ZVE32F-ZVFH-NEXT: .LBB64_1: # %else
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB64_6
; RV64ZVE32F-ZVFH-NEXT: .LBB64_2: # %else2
@@ -6718,23 +6919,26 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: .LBB64_3: # %else4
; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8
; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB64_8
-; RV64ZVE32F-ZVFH-NEXT: # %bb.4: # %else6
+; RV64ZVE32F-ZVFH-NEXT: .LBB64_4: # %else6
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB64_5: # %cond.store
; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB64_1
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_2
; RV64ZVE32F-ZVFH-NEXT: .LBB64_6: # %cond.store1
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a4)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB64_2
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_3
; RV64ZVE32F-ZVFH-NEXT: .LBB64_7: # %cond.store3
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB64_3
+; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a3, .LBB64_4
; RV64ZVE32F-ZVFH-NEXT: .LBB64_8: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 3
@@ -6750,7 +6954,7 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi a5, a3, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a5, .LBB64_5
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_2: # %else2
@@ -6759,7 +6963,7 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_3: # %else4
; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB64_8
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.4: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_4: # %else6
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_5: # %cond.store
; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0)
@@ -6767,21 +6971,24 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a5, v8
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a5
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB64_1
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_2
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a4)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB64_2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_3
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB64_3
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a3, .LBB64_4
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
@@ -6885,7 +7092,7 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-ZVFH-NEXT: andi t1, a4, 1
; RV64ZVE32F-ZVFH-NEXT: bnez t1, .LBB67_9
-; RV64ZVE32F-ZVFH-NEXT: .LBB67_1: # %else
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_10
; RV64ZVE32F-ZVFH-NEXT: .LBB67_2: # %else2
@@ -6906,43 +7113,50 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: .LBB67_7: # %else12
; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_16
-; RV64ZVE32F-ZVFH-NEXT: # %bb.8: # %else14
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_8: # %else14
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB67_9: # %cond.store
; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0)
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB67_1
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_2
; RV64ZVE32F-ZVFH-NEXT: .LBB67_10: # %cond.store1
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (t0)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB67_2
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_3
; RV64ZVE32F-ZVFH-NEXT: .LBB67_11: # %cond.store3
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a7)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB67_3
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_4
; RV64ZVE32F-ZVFH-NEXT: .LBB67_12: # %cond.store5
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a6)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB67_4
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_5
; RV64ZVE32F-ZVFH-NEXT: .LBB67_13: # %cond.store7
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a5)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB67_5
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_6
; RV64ZVE32F-ZVFH-NEXT: .LBB67_14: # %cond.store9
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a3)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB67_6
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 64
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_7
; RV64ZVE32F-ZVFH-NEXT: .LBB67_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB67_7
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_8
; RV64ZVE32F-ZVFH-NEXT: .LBB67_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
@@ -6962,7 +7176,7 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-ZVFHMIN-NEXT: andi t1, a4, 1
; RV64ZVE32F-ZVFHMIN-NEXT: bnez t1, .LBB67_9
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_10
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_2: # %else2
@@ -6983,7 +7197,7 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_7: # %else12
; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_16
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.8: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_8: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_9: # %cond.store
; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0)
@@ -6991,49 +7205,56 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, t1
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_1
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_2
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_10: # %cond.store1
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(t0)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_3
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_11: # %cond.store3
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a7)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_3
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_4
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_12: # %cond.store5
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a6)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_5
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_13: # %cond.store7
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a5)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_5
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_6
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_14: # %cond.store9
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a3)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_7
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB67_7
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_8
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
@@ -7142,10 +7363,10 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB68_13: # %else12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.13: # %else12
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB68_16
-; RV64ZVE32F-ZVFH-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_14: # %else14
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB68_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -7154,7 +7375,8 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB68_13
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB68_14
; RV64ZVE32F-ZVFH-NEXT: .LBB68_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7255,10 +7477,10 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_13: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.13: # %else12
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB68_16
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_14: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -7269,7 +7491,8 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_13
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB68_14
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -7386,10 +7609,10 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB69_13: # %else12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.13: # %else12
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB69_16
-; RV64ZVE32F-ZVFH-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_14: # %else14
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB69_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -7398,7 +7621,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB69_13
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB69_14
; RV64ZVE32F-ZVFH-NEXT: .LBB69_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7499,10 +7723,10 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_13: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.13: # %else12
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB69_16
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_14: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -7513,7 +7737,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_13
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB69_14
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -7635,10 +7860,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB70_13: # %else12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.13: # %else12
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_16
-; RV64ZVE32F-ZVFH-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_14: # %else14
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB70_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -7648,7 +7873,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB70_13
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_14
; RV64ZVE32F-ZVFH-NEXT: .LBB70_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7756,10 +7982,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_13: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.13: # %else12
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_16
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_14: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -7771,7 +7997,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_13
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_14
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -7887,10 +8114,10 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_15
-; RV64ZVE32F-ZVFH-NEXT: .LBB71_13: # %else12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.13: # %else12
; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB71_16
-; RV64ZVE32F-ZVFH-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_14: # %else14
; RV64ZVE32F-ZVFH-NEXT: ret
; RV64ZVE32F-ZVFH-NEXT: .LBB71_15: # %cond.store11
; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
@@ -7899,7 +8126,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
-; RV64ZVE32F-ZVFH-NEXT: j .LBB71_13
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB71_14
; RV64ZVE32F-ZVFH-NEXT: .LBB71_16: # %cond.store13
; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
@@ -7999,10 +8227,10 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_15
-; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_13: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.13: # %else12
; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB71_16
-; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_14: # %else14
; RV64ZVE32F-ZVFHMIN-NEXT: ret
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11
; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
@@ -8013,7 +8241,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
-; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_13
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB71_14
; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
@@ -8093,15 +8322,16 @@ define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB73_3
-; RV64ZVE32F-NEXT: .LBB73_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB73_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB73_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB73_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB73_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB73_2
; RV64ZVE32F-NEXT: .LBB73_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -8135,7 +8365,7 @@ define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB74_5
-; RV64ZVE32F-NEXT: .LBB74_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB74_6
; RV64ZVE32F-NEXT: .LBB74_2: # %else2
@@ -8144,23 +8374,26 @@ define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB74_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB74_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else6
+; RV64ZVE32F-NEXT: .LBB74_4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB74_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB74_1
+; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB74_2
; RV64ZVE32F-NEXT: .LBB74_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse32.v v9, (a4)
-; RV64ZVE32F-NEXT: j .LBB74_2
+; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB74_3
; RV64ZVE32F-NEXT: .LBB74_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse32.v v9, (a2)
-; RV64ZVE32F-NEXT: j .LBB74_3
+; RV64ZVE32F-NEXT: andi a3, a3, 8
+; RV64ZVE32F-NEXT: beqz a3, .LBB74_4
; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -8238,7 +8471,7 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB77_9
-; RV64ZVE32F-NEXT: .LBB77_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB77_10
; RV64ZVE32F-NEXT: .LBB77_2: # %else2
@@ -8259,46 +8492,53 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB77_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB77_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else14
+; RV64ZVE32F-NEXT: .LBB77_8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB77_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: j .LBB77_1
+; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_2
; RV64ZVE32F-NEXT: .LBB77_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vse32.v v10, (t0)
-; RV64ZVE32F-NEXT: j .LBB77_2
+; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_3
; RV64ZVE32F-NEXT: .LBB77_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV64ZVE32F-NEXT: vse32.v v10, (a7)
-; RV64ZVE32F-NEXT: j .LBB77_3
+; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_4
; RV64ZVE32F-NEXT: .LBB77_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a6)
-; RV64ZVE32F-NEXT: j .LBB77_4
+; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_5
; RV64ZVE32F-NEXT: .LBB77_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a5)
-; RV64ZVE32F-NEXT: j .LBB77_5
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_6
; RV64ZVE32F-NEXT: .LBB77_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a3)
-; RV64ZVE32F-NEXT: j .LBB77_6
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_7
; RV64ZVE32F-NEXT: .LBB77_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: j .LBB77_7
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_8
; RV64ZVE32F-NEXT: .LBB77_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -8407,10 +8647,10 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB78_15
-; RV64ZVE32F-NEXT: .LBB78_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB78_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB78_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB78_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -8420,7 +8660,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB78_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB78_14
; RV64ZVE32F-NEXT: .LBB78_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -8535,10 +8776,10 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB79_15
-; RV64ZVE32F-NEXT: .LBB79_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB79_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB79_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB79_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -8548,7 +8789,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB79_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB79_14
; RV64ZVE32F-NEXT: .LBB79_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -8671,10 +8913,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB80_15
-; RV64ZVE32F-NEXT: .LBB80_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB80_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB80_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -8685,7 +8927,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB80_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB80_14
; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -8804,10 +9047,10 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB81_15
-; RV64ZVE32F-NEXT: .LBB81_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB81_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB81_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -8817,7 +9060,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB81_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB81_14
; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -8934,10 +9178,10 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB82_15
-; RV64ZVE32F-NEXT: .LBB82_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB82_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB82_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -8947,7 +9191,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB82_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB82_14
; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9071,10 +9316,10 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
-; RV64ZVE32F-NEXT: .LBB83_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB83_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9085,7 +9330,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB83_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB83_14
; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9160,13 +9406,13 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB84_13
-; RV64ZVE32F-NEXT: .LBB84_7: # %else6
+; RV64ZVE32F-NEXT: # %bb.7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
; RV64ZVE32F-NEXT: .LBB84_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB84_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: .LBB84_9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -9181,10 +9427,10 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
-; RV64ZVE32F-NEXT: .LBB84_11: # %else12
+; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB84_16
-; RV64ZVE32F-NEXT: # %bb.12: # %else14
+; RV64ZVE32F-NEXT: .LBB84_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -9194,7 +9440,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB84_7
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_8
; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -9203,7 +9450,9 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB84_8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_9
+; RV64ZVE32F-NEXT: j .LBB84_10
; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -9212,7 +9461,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB84_11
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB84_12
; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -9290,16 +9540,17 @@ define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
; RV32ZVE32F-NEXT: bnez a1, .LBB86_3
-; RV32ZVE32F-NEXT: .LBB86_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB86_4
-; RV32ZVE32F-NEXT: # %bb.2: # %else2
+; RV32ZVE32F-NEXT: .LBB86_2: # %else2
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB86_3: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB86_1
+; RV32ZVE32F-NEXT: andi a0, a0, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB86_2
; RV32ZVE32F-NEXT: .LBB86_4: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -9313,14 +9564,15 @@ define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB86_3
-; RV64ZVE32F-NEXT: .LBB86_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB86_4
-; RV64ZVE32F-NEXT: # %bb.2: # %else2
+; RV64ZVE32F-NEXT: .LBB86_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB86_3: # %cond.store
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB86_1
+; RV64ZVE32F-NEXT: andi a2, a2, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_2
; RV64ZVE32F-NEXT: .LBB86_4: # %cond.store1
; RV64ZVE32F-NEXT: fsd fa1, 0(a1)
; RV64ZVE32F-NEXT: ret
@@ -9349,7 +9601,7 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
; RV32ZVE32F-NEXT: bnez a1, .LBB87_5
-; RV32ZVE32F-NEXT: .LBB87_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB87_6
; RV32ZVE32F-NEXT: .LBB87_2: # %else2
@@ -9358,25 +9610,28 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: .LBB87_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a0, 8
; RV32ZVE32F-NEXT: bnez a0, .LBB87_8
-; RV32ZVE32F-NEXT: # %bb.4: # %else6
+; RV32ZVE32F-NEXT: .LBB87_4: # %else6
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB87_5: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB87_1
+; RV32ZVE32F-NEXT: andi a1, a0, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB87_2
; RV32ZVE32F-NEXT: .LBB87_6: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB87_2
+; RV32ZVE32F-NEXT: andi a1, a0, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB87_3
; RV32ZVE32F-NEXT: .LBB87_7: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB87_3
+; RV32ZVE32F-NEXT: andi a0, a0, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB87_4
; RV32ZVE32F-NEXT: .LBB87_8: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
@@ -9393,7 +9648,7 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
; RV64ZVE32F-NEXT: bnez a5, .LBB87_5
-; RV64ZVE32F-NEXT: .LBB87_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB87_6
; RV64ZVE32F-NEXT: .LBB87_2: # %else2
@@ -9402,18 +9657,21 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB87_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB87_8
-; RV64ZVE32F-NEXT: # %bb.4: # %else6
+; RV64ZVE32F-NEXT: .LBB87_4: # %else6
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB87_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB87_1
+; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB87_2
; RV64ZVE32F-NEXT: .LBB87_6: # %cond.store1
; RV64ZVE32F-NEXT: fsd fa1, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB87_2
+; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB87_3
; RV64ZVE32F-NEXT: .LBB87_7: # %cond.store3
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB87_3
+; RV64ZVE32F-NEXT: andi a3, a3, 8
+; RV64ZVE32F-NEXT: beqz a3, .LBB87_4
; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store5
; RV64ZVE32F-NEXT: fsd fa3, 0(a1)
; RV64ZVE32F-NEXT: ret
@@ -9494,7 +9752,7 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
; RV32ZVE32F-NEXT: bnez a1, .LBB90_9
-; RV32ZVE32F-NEXT: .LBB90_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB90_10
; RV32ZVE32F-NEXT: .LBB90_2: # %else2
@@ -9515,49 +9773,56 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: .LBB90_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB90_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB90_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB90_1
+; RV32ZVE32F-NEXT: andi a1, a0, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_2
; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB90_2
+; RV32ZVE32F-NEXT: andi a1, a0, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_3
; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB90_3
+; RV32ZVE32F-NEXT: andi a1, a0, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_4
; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB90_4
+; RV32ZVE32F-NEXT: andi a1, a0, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_5
; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB90_5
+; RV32ZVE32F-NEXT: andi a1, a0, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_6
; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB90_6
+; RV32ZVE32F-NEXT: andi a1, a0, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_7
; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB90_7
+; RV32ZVE32F-NEXT: andi a0, a0, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB90_8
; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -9578,7 +9843,7 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB90_9
-; RV64ZVE32F-NEXT: .LBB90_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB90_10
; RV64ZVE32F-NEXT: .LBB90_2: # %else2
@@ -9599,30 +9864,37 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB90_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB90_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else14
+; RV64ZVE32F-NEXT: .LBB90_8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV64ZVE32F-NEXT: j .LBB90_1
+; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_2
; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1
; RV64ZVE32F-NEXT: fsd fa1, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB90_2
+; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_3
; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3
; RV64ZVE32F-NEXT: fsd fa2, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB90_3
+; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_4
; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5
; RV64ZVE32F-NEXT: fsd fa3, 0(a6)
-; RV64ZVE32F-NEXT: j .LBB90_4
+; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_5
; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7
; RV64ZVE32F-NEXT: fsd fa4, 0(a5)
-; RV64ZVE32F-NEXT: j .LBB90_5
+; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_6
; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9
; RV64ZVE32F-NEXT: fsd fa5, 0(a3)
-; RV64ZVE32F-NEXT: j .LBB90_6
+; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_7
; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB90_7
+; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_8
; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13
; RV64ZVE32F-NEXT: fsd fa7, 0(a1)
; RV64ZVE32F-NEXT: ret
@@ -9659,7 +9931,7 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB91_9
-; RV32ZVE32F-NEXT: .LBB91_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB91_10
; RV32ZVE32F-NEXT: .LBB91_2: # %else2
@@ -9680,48 +9952,55 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV32ZVE32F-NEXT: .LBB91_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB91_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB91_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB91_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB91_1
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_2
; RV32ZVE32F-NEXT: .LBB91_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB91_2
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_3
; RV32ZVE32F-NEXT: .LBB91_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB91_3
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_4
; RV32ZVE32F-NEXT: .LBB91_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB91_4
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_5
; RV32ZVE32F-NEXT: .LBB91_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB91_5
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_6
; RV32ZVE32F-NEXT: .LBB91_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB91_6
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_7
; RV32ZVE32F-NEXT: .LBB91_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB91_7
+; RV32ZVE32F-NEXT: andi a0, a1, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_8
; RV32ZVE32F-NEXT: .LBB91_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -9794,17 +10073,18 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB91_15
-; RV64ZVE32F-NEXT: .LBB91_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB91_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB91_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB91_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB91_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB91_14
; RV64ZVE32F-NEXT: .LBB91_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -9846,7 +10126,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB92_9
-; RV32ZVE32F-NEXT: .LBB92_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB92_10
; RV32ZVE32F-NEXT: .LBB92_2: # %else2
@@ -9867,48 +10147,55 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32ZVE32F-NEXT: .LBB92_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB92_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB92_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB92_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB92_1
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_2
; RV32ZVE32F-NEXT: .LBB92_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB92_2
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_3
; RV32ZVE32F-NEXT: .LBB92_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB92_3
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_4
; RV32ZVE32F-NEXT: .LBB92_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB92_4
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_5
; RV32ZVE32F-NEXT: .LBB92_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB92_5
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_6
; RV32ZVE32F-NEXT: .LBB92_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB92_6
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_7
; RV32ZVE32F-NEXT: .LBB92_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB92_7
+; RV32ZVE32F-NEXT: andi a0, a1, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_8
; RV32ZVE32F-NEXT: .LBB92_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -9981,17 +10268,18 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB92_15
-; RV64ZVE32F-NEXT: .LBB92_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB92_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB92_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB92_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB92_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB92_14
; RV64ZVE32F-NEXT: .LBB92_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10035,7 +10323,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB93_9
-; RV32ZVE32F-NEXT: .LBB93_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB93_10
; RV32ZVE32F-NEXT: .LBB93_2: # %else2
@@ -10056,48 +10344,55 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32ZVE32F-NEXT: .LBB93_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB93_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB93_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB93_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB93_1
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_2
; RV32ZVE32F-NEXT: .LBB93_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB93_2
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_3
; RV32ZVE32F-NEXT: .LBB93_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB93_3
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_4
; RV32ZVE32F-NEXT: .LBB93_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB93_4
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_5
; RV32ZVE32F-NEXT: .LBB93_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB93_5
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_6
; RV32ZVE32F-NEXT: .LBB93_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB93_6
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_7
; RV32ZVE32F-NEXT: .LBB93_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB93_7
+; RV32ZVE32F-NEXT: andi a0, a1, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_8
; RV32ZVE32F-NEXT: .LBB93_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -10176,10 +10471,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB93_15
-; RV64ZVE32F-NEXT: .LBB93_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB93_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB93_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB93_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10187,7 +10482,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB93_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB93_14
; RV64ZVE32F-NEXT: .LBB93_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10231,7 +10527,7 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8
; RV32ZVE32F-NEXT: bnez a2, .LBB94_9
-; RV32ZVE32F-NEXT: .LBB94_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB94_10
; RV32ZVE32F-NEXT: .LBB94_2: # %else2
@@ -10252,49 +10548,56 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV32ZVE32F-NEXT: .LBB94_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB94_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB94_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB94_9: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB94_1
+; RV32ZVE32F-NEXT: andi a1, a0, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB94_2
; RV32ZVE32F-NEXT: .LBB94_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB94_2
+; RV32ZVE32F-NEXT: andi a1, a0, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB94_3
; RV32ZVE32F-NEXT: .LBB94_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB94_3
+; RV32ZVE32F-NEXT: andi a1, a0, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB94_4
; RV32ZVE32F-NEXT: .LBB94_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB94_4
+; RV32ZVE32F-NEXT: andi a1, a0, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB94_5
; RV32ZVE32F-NEXT: .LBB94_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB94_5
+; RV32ZVE32F-NEXT: andi a1, a0, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB94_6
; RV32ZVE32F-NEXT: .LBB94_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB94_6
+; RV32ZVE32F-NEXT: andi a1, a0, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB94_7
; RV32ZVE32F-NEXT: .LBB94_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB94_7
+; RV32ZVE32F-NEXT: andi a0, a0, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB94_8
; RV32ZVE32F-NEXT: .LBB94_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
@@ -10368,17 +10671,18 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB94_15
-; RV64ZVE32F-NEXT: .LBB94_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB94_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB94_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB94_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB94_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB94_14
; RV64ZVE32F-NEXT: .LBB94_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10420,7 +10724,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8
; RV32ZVE32F-NEXT: bnez a2, .LBB95_9
-; RV32ZVE32F-NEXT: .LBB95_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB95_10
; RV32ZVE32F-NEXT: .LBB95_2: # %else2
@@ -10441,49 +10745,56 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: .LBB95_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB95_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB95_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB95_9: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB95_1
+; RV32ZVE32F-NEXT: andi a1, a0, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB95_2
; RV32ZVE32F-NEXT: .LBB95_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB95_2
+; RV32ZVE32F-NEXT: andi a1, a0, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB95_3
; RV32ZVE32F-NEXT: .LBB95_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB95_3
+; RV32ZVE32F-NEXT: andi a1, a0, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB95_4
; RV32ZVE32F-NEXT: .LBB95_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB95_4
+; RV32ZVE32F-NEXT: andi a1, a0, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB95_5
; RV32ZVE32F-NEXT: .LBB95_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB95_5
+; RV32ZVE32F-NEXT: andi a1, a0, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB95_6
; RV32ZVE32F-NEXT: .LBB95_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB95_6
+; RV32ZVE32F-NEXT: andi a1, a0, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB95_7
; RV32ZVE32F-NEXT: .LBB95_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB95_7
+; RV32ZVE32F-NEXT: andi a0, a0, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB95_8
; RV32ZVE32F-NEXT: .LBB95_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
@@ -10557,17 +10868,18 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB95_15
-; RV64ZVE32F-NEXT: .LBB95_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB95_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB95_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB95_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB95_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB95_14
; RV64ZVE32F-NEXT: .LBB95_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10611,7 +10923,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32ZVE32F-NEXT: vwmaccu.vx v10, a1, v8
; RV32ZVE32F-NEXT: bnez a2, .LBB96_9
-; RV32ZVE32F-NEXT: .LBB96_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
; RV32ZVE32F-NEXT: bnez a1, .LBB96_10
; RV32ZVE32F-NEXT: .LBB96_2: # %else2
@@ -10632,49 +10944,56 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: .LBB96_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB96_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB96_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB96_9: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB96_1
+; RV32ZVE32F-NEXT: andi a1, a0, 2
+; RV32ZVE32F-NEXT: beqz a1, .LBB96_2
; RV32ZVE32F-NEXT: .LBB96_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB96_2
+; RV32ZVE32F-NEXT: andi a1, a0, 4
+; RV32ZVE32F-NEXT: beqz a1, .LBB96_3
; RV32ZVE32F-NEXT: .LBB96_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB96_3
+; RV32ZVE32F-NEXT: andi a1, a0, 8
+; RV32ZVE32F-NEXT: beqz a1, .LBB96_4
; RV32ZVE32F-NEXT: .LBB96_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB96_4
+; RV32ZVE32F-NEXT: andi a1, a0, 16
+; RV32ZVE32F-NEXT: beqz a1, .LBB96_5
; RV32ZVE32F-NEXT: .LBB96_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB96_5
+; RV32ZVE32F-NEXT: andi a1, a0, 32
+; RV32ZVE32F-NEXT: beqz a1, .LBB96_6
; RV32ZVE32F-NEXT: .LBB96_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB96_6
+; RV32ZVE32F-NEXT: andi a1, a0, 64
+; RV32ZVE32F-NEXT: beqz a1, .LBB96_7
; RV32ZVE32F-NEXT: .LBB96_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
-; RV32ZVE32F-NEXT: j .LBB96_7
+; RV32ZVE32F-NEXT: andi a0, a0, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB96_8
; RV32ZVE32F-NEXT: .LBB96_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7
@@ -10754,10 +11073,10 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB96_15
-; RV64ZVE32F-NEXT: .LBB96_13: # %else12
+; RV64ZVE32F-NEXT: # %bb.13: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB96_16
-; RV64ZVE32F-NEXT: # %bb.14: # %else14
+; RV64ZVE32F-NEXT: .LBB96_14: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10765,7 +11084,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB96_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB96_14
; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10808,7 +11128,7 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB97_9
-; RV32ZVE32F-NEXT: .LBB97_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB97_10
; RV32ZVE32F-NEXT: .LBB97_2: # %else2
@@ -10829,48 +11149,55 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV32ZVE32F-NEXT: .LBB97_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB97_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB97_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB97_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB97_1
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_2
; RV32ZVE32F-NEXT: .LBB97_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB97_2
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_3
; RV32ZVE32F-NEXT: .LBB97_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB97_3
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_4
; RV32ZVE32F-NEXT: .LBB97_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB97_4
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_5
; RV32ZVE32F-NEXT: .LBB97_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB97_5
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_6
; RV32ZVE32F-NEXT: .LBB97_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB97_6
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_7
; RV32ZVE32F-NEXT: .LBB97_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB97_7
+; RV32ZVE32F-NEXT: andi a0, a1, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_8
; RV32ZVE32F-NEXT: .LBB97_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -10915,13 +11242,13 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB97_13
-; RV64ZVE32F-NEXT: .LBB97_7: # %else6
+; RV64ZVE32F-NEXT: # %bb.7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
; RV64ZVE32F-NEXT: .LBB97_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB97_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: .LBB97_9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10933,10 +11260,10 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_15
-; RV64ZVE32F-NEXT: .LBB97_11: # %else12
+; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB97_16
-; RV64ZVE32F-NEXT: # %bb.12: # %else14
+; RV64ZVE32F-NEXT: .LBB97_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB97_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -10945,20 +11272,24 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB97_7
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB97_8
; RV64ZVE32F-NEXT: .LBB97_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB97_8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB97_9
+; RV64ZVE32F-NEXT: j .LBB97_10
; RV64ZVE32F-NEXT: .LBB97_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB97_11
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB97_12
; RV64ZVE32F-NEXT: .LBB97_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -10999,7 +11330,7 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB98_9
-; RV32ZVE32F-NEXT: .LBB98_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB98_10
; RV32ZVE32F-NEXT: .LBB98_2: # %else2
@@ -11020,48 +11351,55 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: .LBB98_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB98_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB98_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB98_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB98_1
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_2
; RV32ZVE32F-NEXT: .LBB98_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB98_2
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_3
; RV32ZVE32F-NEXT: .LBB98_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB98_3
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_4
; RV32ZVE32F-NEXT: .LBB98_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB98_4
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_5
; RV32ZVE32F-NEXT: .LBB98_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB98_5
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_6
; RV32ZVE32F-NEXT: .LBB98_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB98_6
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_7
; RV32ZVE32F-NEXT: .LBB98_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB98_7
+; RV32ZVE32F-NEXT: andi a0, a1, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_8
; RV32ZVE32F-NEXT: .LBB98_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -11106,13 +11444,13 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB98_13
-; RV64ZVE32F-NEXT: .LBB98_7: # %else6
+; RV64ZVE32F-NEXT: # %bb.7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
; RV64ZVE32F-NEXT: .LBB98_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB98_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: .LBB98_9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -11124,10 +11462,10 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB98_15
-; RV64ZVE32F-NEXT: .LBB98_11: # %else12
+; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB98_16
-; RV64ZVE32F-NEXT: # %bb.12: # %else14
+; RV64ZVE32F-NEXT: .LBB98_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB98_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -11136,20 +11474,24 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB98_7
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB98_8
; RV64ZVE32F-NEXT: .LBB98_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB98_8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB98_9
+; RV64ZVE32F-NEXT: j .LBB98_10
; RV64ZVE32F-NEXT: .LBB98_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB98_11
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB98_12
; RV64ZVE32F-NEXT: .LBB98_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11191,7 +11533,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB99_9
-; RV32ZVE32F-NEXT: .LBB99_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB99_10
; RV32ZVE32F-NEXT: .LBB99_2: # %else2
@@ -11212,48 +11554,55 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: .LBB99_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB99_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB99_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB99_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB99_1
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_2
; RV32ZVE32F-NEXT: .LBB99_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB99_2
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_3
; RV32ZVE32F-NEXT: .LBB99_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB99_3
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_4
; RV32ZVE32F-NEXT: .LBB99_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB99_4
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_5
; RV32ZVE32F-NEXT: .LBB99_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB99_5
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_6
; RV32ZVE32F-NEXT: .LBB99_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB99_6
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_7
; RV32ZVE32F-NEXT: .LBB99_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB99_7
+; RV32ZVE32F-NEXT: andi a0, a1, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_8
; RV32ZVE32F-NEXT: .LBB99_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -11301,13 +11650,13 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB99_13
-; RV64ZVE32F-NEXT: .LBB99_7: # %else6
+; RV64ZVE32F-NEXT: # %bb.7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB99_14
; RV64ZVE32F-NEXT: .LBB99_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB99_10
-; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
+; RV64ZVE32F-NEXT: .LBB99_9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -11320,10 +11669,10 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB99_15
-; RV64ZVE32F-NEXT: .LBB99_11: # %else12
+; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB99_16
-; RV64ZVE32F-NEXT: # %bb.12: # %else14
+; RV64ZVE32F-NEXT: .LBB99_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB99_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -11333,7 +11682,8 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB99_7
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB99_8
; RV64ZVE32F-NEXT: .LBB99_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -11341,14 +11691,17 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB99_8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB99_9
+; RV64ZVE32F-NEXT: j .LBB99_10
; RV64ZVE32F-NEXT: .LBB99_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: j .LBB99_11
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB99_12
; RV64ZVE32F-NEXT: .LBB99_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -11406,7 +11759,7 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
; RV32ZVE32F-NEXT: bnez a2, .LBB100_9
-; RV32ZVE32F-NEXT: .LBB100_1: # %else
+; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB100_10
; RV32ZVE32F-NEXT: .LBB100_2: # %else2
@@ -11427,48 +11780,55 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV32ZVE32F-NEXT: .LBB100_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB100_16
-; RV32ZVE32F-NEXT: # %bb.8: # %else14
+; RV32ZVE32F-NEXT: .LBB100_8: # %else14
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB100_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB100_1
+; RV32ZVE32F-NEXT: andi a0, a1, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_2
; RV32ZVE32F-NEXT: .LBB100_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB100_2
+; RV32ZVE32F-NEXT: andi a0, a1, 4
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_3
; RV32ZVE32F-NEXT: .LBB100_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB100_3
+; RV32ZVE32F-NEXT: andi a0, a1, 8
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_4
; RV32ZVE32F-NEXT: .LBB100_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB100_4
+; RV32ZVE32F-NEXT: andi a0, a1, 16
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_5
; RV32ZVE32F-NEXT: .LBB100_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB100_5
+; RV32ZVE32F-NEXT: andi a0, a1, 32
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_6
; RV32ZVE32F-NEXT: .LBB100_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB100_6
+; RV32ZVE32F-NEXT: andi a0, a1, 64
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_7
; RV32ZVE32F-NEXT: .LBB100_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
-; RV32ZVE32F-NEXT: j .LBB100_7
+; RV32ZVE32F-NEXT: andi a0, a1, -128
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_8
; RV32ZVE32F-NEXT: .LBB100_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -11489,7 +11849,7 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi t2, a3, 1
; RV64ZVE32F-NEXT: bnez t2, .LBB100_9
-; RV64ZVE32F-NEXT: .LBB100_1: # %else
+; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a1, a3, 2
; RV64ZVE32F-NEXT: bnez a1, .LBB100_10
; RV64ZVE32F-NEXT: .LBB100_2: # %else2
@@ -11510,44 +11870,51 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV64ZVE32F-NEXT: .LBB100_7: # %else12
; RV64ZVE32F-NEXT: andi a1, a3, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB100_16
-; RV64ZVE32F-NEXT: # %bb.8: # %else14
+; RV64ZVE32F-NEXT: .LBB100_8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB100_9: # %cond.store
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: slli a1, a1, 3
; RV64ZVE32F-NEXT: add a1, a0, a1
; RV64ZVE32F-NEXT: fsd fa0, 0(a1)
-; RV64ZVE32F-NEXT: j .LBB100_1
+; RV64ZVE32F-NEXT: andi a1, a3, 2
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_2
; RV64ZVE32F-NEXT: .LBB100_10: # %cond.store1
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a0, t1
; RV64ZVE32F-NEXT: fsd fa1, 0(t1)
-; RV64ZVE32F-NEXT: j .LBB100_2
+; RV64ZVE32F-NEXT: andi a1, a3, 4
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_3
; RV64ZVE32F-NEXT: .LBB100_11: # %cond.store3
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a0, t0
; RV64ZVE32F-NEXT: fsd fa2, 0(t0)
-; RV64ZVE32F-NEXT: j .LBB100_3
+; RV64ZVE32F-NEXT: andi a1, a3, 8
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_4
; RV64ZVE32F-NEXT: .LBB100_12: # %cond.store5
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a0, a7
; RV64ZVE32F-NEXT: fsd fa3, 0(a7)
-; RV64ZVE32F-NEXT: j .LBB100_4
+; RV64ZVE32F-NEXT: andi a1, a3, 16
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_5
; RV64ZVE32F-NEXT: .LBB100_13: # %cond.store7
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a0, a6
; RV64ZVE32F-NEXT: fsd fa4, 0(a6)
-; RV64ZVE32F-NEXT: j .LBB100_5
+; RV64ZVE32F-NEXT: andi a1, a3, 32
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_6
; RV64ZVE32F-NEXT: .LBB100_14: # %cond.store9
; RV64ZVE32F-NEXT: slli a5, a5, 3
; RV64ZVE32F-NEXT: add a5, a0, a5
; RV64ZVE32F-NEXT: fsd fa5, 0(a5)
-; RV64ZVE32F-NEXT: j .LBB100_6
+; RV64ZVE32F-NEXT: andi a1, a3, 64
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_7
; RV64ZVE32F-NEXT: .LBB100_15: # %cond.store11
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a0, a4
; RV64ZVE32F-NEXT: fsd fa6, 0(a4)
-; RV64ZVE32F-NEXT: j .LBB100_7
+; RV64ZVE32F-NEXT: andi a1, a3, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_8
; RV64ZVE32F-NEXT: .LBB100_16: # %cond.store13
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a0, a0, a2
@@ -11960,13 +12327,13 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16
; RV64ZVE32F-NEXT: bltz a2, .LBB102_63
-; RV64ZVE32F-NEXT: .LBB102_31: # %else30
+; RV64ZVE32F-NEXT: # %bb.31: # %else30
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bltz a2, .LBB102_64
; RV64ZVE32F-NEXT: .LBB102_32: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bgez a2, .LBB102_34
-; RV64ZVE32F-NEXT: # %bb.33: # %cond.store33
+; RV64ZVE32F-NEXT: .LBB102_33: # %cond.store33
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -12154,7 +12521,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_31
+; RV64ZVE32F-NEXT: slli a2, a1, 47
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_32
; RV64ZVE32F-NEXT: .LBB102_64: # %cond.store31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -12162,7 +12530,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: j .LBB102_32
+; RV64ZVE32F-NEXT: slli a2, a1, 46
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_33
+; RV64ZVE32F-NEXT: j .LBB102_34
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %val, <32 x ptr> %ptrs, i32 1, <32 x i1> %m)
ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index 4dd07fdac72b0..29fbb8acc3358 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -253,7 +253,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: vmv.x.s a0, v0
; RV32-SLOW-NEXT: andi a1, a0, 1
; RV32-SLOW-NEXT: bnez a1, .LBB6_5
-; RV32-SLOW-NEXT: .LBB6_1: # %else
+; RV32-SLOW-NEXT: # %bb.1: # %else
; RV32-SLOW-NEXT: andi a1, a0, 2
; RV32-SLOW-NEXT: bnez a1, .LBB6_6
; RV32-SLOW-NEXT: .LBB6_2: # %else2
@@ -262,7 +262,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: .LBB6_3: # %else4
; RV32-SLOW-NEXT: andi a0, a0, 8
; RV32-SLOW-NEXT: bnez a0, .LBB6_8
-; RV32-SLOW-NEXT: # %bb.4: # %else6
+; RV32-SLOW-NEXT: .LBB6_4: # %else6
; RV32-SLOW-NEXT: ret
; RV32-SLOW-NEXT: .LBB6_5: # %cond.store
; RV32-SLOW-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -272,7 +272,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: srli a3, a1, 8
; RV32-SLOW-NEXT: sb a1, 0(a2)
; RV32-SLOW-NEXT: sb a3, 1(a2)
-; RV32-SLOW-NEXT: j .LBB6_1
+; RV32-SLOW-NEXT: andi a1, a0, 2
+; RV32-SLOW-NEXT: beqz a1, .LBB6_2
; RV32-SLOW-NEXT: .LBB6_6: # %cond.store1
; RV32-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v10, v8, 1
@@ -283,7 +284,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: srli a3, a1, 8
; RV32-SLOW-NEXT: sb a1, 0(a2)
; RV32-SLOW-NEXT: sb a3, 1(a2)
-; RV32-SLOW-NEXT: j .LBB6_2
+; RV32-SLOW-NEXT: andi a1, a0, 4
+; RV32-SLOW-NEXT: beqz a1, .LBB6_3
; RV32-SLOW-NEXT: .LBB6_7: # %cond.store3
; RV32-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v10, v8, 2
@@ -294,7 +296,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: srli a3, a1, 8
; RV32-SLOW-NEXT: sb a1, 0(a2)
; RV32-SLOW-NEXT: sb a3, 1(a2)
-; RV32-SLOW-NEXT: j .LBB6_3
+; RV32-SLOW-NEXT: andi a0, a0, 8
+; RV32-SLOW-NEXT: beqz a0, .LBB6_4
; RV32-SLOW-NEXT: .LBB6_8: # %cond.store5
; RV32-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 3
@@ -315,7 +318,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: vmv.x.s a0, v0
; RV64-SLOW-NEXT: andi a1, a0, 1
; RV64-SLOW-NEXT: bnez a1, .LBB6_5
-; RV64-SLOW-NEXT: .LBB6_1: # %else
+; RV64-SLOW-NEXT: # %bb.1: # %else
; RV64-SLOW-NEXT: andi a1, a0, 2
; RV64-SLOW-NEXT: bnez a1, .LBB6_6
; RV64-SLOW-NEXT: .LBB6_2: # %else2
@@ -324,7 +327,7 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: .LBB6_3: # %else4
; RV64-SLOW-NEXT: andi a0, a0, 8
; RV64-SLOW-NEXT: bnez a0, .LBB6_8
-; RV64-SLOW-NEXT: # %bb.4: # %else6
+; RV64-SLOW-NEXT: .LBB6_4: # %else6
; RV64-SLOW-NEXT: ret
; RV64-SLOW-NEXT: .LBB6_5: # %cond.store
; RV64-SLOW-NEXT: vsetvli zero, zero, e16, m2, ta, ma
@@ -334,7 +337,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: srli a3, a1, 8
; RV64-SLOW-NEXT: sb a1, 0(a2)
; RV64-SLOW-NEXT: sb a3, 1(a2)
-; RV64-SLOW-NEXT: j .LBB6_1
+; RV64-SLOW-NEXT: andi a1, a0, 2
+; RV64-SLOW-NEXT: beqz a1, .LBB6_2
; RV64-SLOW-NEXT: .LBB6_6: # %cond.store1
; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64-SLOW-NEXT: vslidedown.vi v9, v8, 1
@@ -345,7 +349,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: srli a3, a1, 8
; RV64-SLOW-NEXT: sb a1, 0(a2)
; RV64-SLOW-NEXT: sb a3, 1(a2)
-; RV64-SLOW-NEXT: j .LBB6_2
+; RV64-SLOW-NEXT: andi a1, a0, 4
+; RV64-SLOW-NEXT: beqz a1, .LBB6_3
; RV64-SLOW-NEXT: .LBB6_7: # %cond.store3
; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64-SLOW-NEXT: vslidedown.vi v9, v8, 2
@@ -356,7 +361,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: srli a3, a1, 8
; RV64-SLOW-NEXT: sb a1, 0(a2)
; RV64-SLOW-NEXT: sb a3, 1(a2)
-; RV64-SLOW-NEXT: j .LBB6_3
+; RV64-SLOW-NEXT: andi a0, a0, 8
+; RV64-SLOW-NEXT: beqz a0, .LBB6_4
; RV64-SLOW-NEXT: .LBB6_8: # %cond.store5
; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64-SLOW-NEXT: vslidedown.vi v12, v8, 3
@@ -395,10 +401,10 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV32-SLOW-NEXT: vmv.x.s a0, v0
; RV32-SLOW-NEXT: andi a1, a0, 1
; RV32-SLOW-NEXT: bnez a1, .LBB7_3
-; RV32-SLOW-NEXT: .LBB7_1: # %else
+; RV32-SLOW-NEXT: # %bb.1: # %else
; RV32-SLOW-NEXT: andi a0, a0, 2
; RV32-SLOW-NEXT: bnez a0, .LBB7_4
-; RV32-SLOW-NEXT: # %bb.2: # %else2
+; RV32-SLOW-NEXT: .LBB7_2: # %else2
; RV32-SLOW-NEXT: ret
; RV32-SLOW-NEXT: .LBB7_3: # %cond.store
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -407,7 +413,8 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV32-SLOW-NEXT: srli a3, a1, 16
; RV32-SLOW-NEXT: sh a1, 0(a2)
; RV32-SLOW-NEXT: sh a3, 2(a2)
-; RV32-SLOW-NEXT: j .LBB7_1
+; RV32-SLOW-NEXT: andi a0, a0, 2
+; RV32-SLOW-NEXT: beqz a0, .LBB7_2
; RV32-SLOW-NEXT: .LBB7_4: # %cond.store1
; RV32-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 1
@@ -425,10 +432,10 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV64-SLOW-NEXT: vmv.x.s a0, v0
; RV64-SLOW-NEXT: andi a1, a0, 1
; RV64-SLOW-NEXT: bnez a1, .LBB7_3
-; RV64-SLOW-NEXT: .LBB7_1: # %else
+; RV64-SLOW-NEXT: # %bb.1: # %else
; RV64-SLOW-NEXT: andi a0, a0, 2
; RV64-SLOW-NEXT: bnez a0, .LBB7_4
-; RV64-SLOW-NEXT: # %bb.2: # %else2
+; RV64-SLOW-NEXT: .LBB7_2: # %else2
; RV64-SLOW-NEXT: ret
; RV64-SLOW-NEXT: .LBB7_3: # %cond.store
; RV64-SLOW-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -438,7 +445,8 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV64-SLOW-NEXT: srli a3, a1, 16
; RV64-SLOW-NEXT: sh a1, 0(a2)
; RV64-SLOW-NEXT: sh a3, 2(a2)
-; RV64-SLOW-NEXT: j .LBB7_1
+; RV64-SLOW-NEXT: andi a0, a0, 2
+; RV64-SLOW-NEXT: beqz a0, .LBB7_2
; RV64-SLOW-NEXT: .LBB7_4: # %cond.store1
; RV64-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 1
@@ -583,10 +591,10 @@ define void @masked_store_v2i32_align2(<2 x i32> %val, ptr %a, <2 x i32> %m) nou
; SLOW-NEXT: vmv.x.s a1, v9
; SLOW-NEXT: andi a2, a1, 1
; SLOW-NEXT: bnez a2, .LBB9_3
-; SLOW-NEXT: .LBB9_1: # %else
+; SLOW-NEXT: # %bb.1: # %else
; SLOW-NEXT: andi a1, a1, 2
; SLOW-NEXT: bnez a1, .LBB9_4
-; SLOW-NEXT: # %bb.2: # %else2
+; SLOW-NEXT: .LBB9_2: # %else2
; SLOW-NEXT: ret
; SLOW-NEXT: .LBB9_3: # %cond.store
; SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -594,7 +602,8 @@ define void @masked_store_v2i32_align2(<2 x i32> %val, ptr %a, <2 x i32> %m) nou
; SLOW-NEXT: srli a3, a2, 16
; SLOW-NEXT: sh a2, 0(a0)
; SLOW-NEXT: sh a3, 2(a0)
-; SLOW-NEXT: j .LBB9_1
+; SLOW-NEXT: andi a1, a1, 2
+; SLOW-NEXT: beqz a1, .LBB9_2
; SLOW-NEXT: .LBB9_4: # %cond.store1
; SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; SLOW-NEXT: vslidedown.vi v8, v8, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 9498446f5982b..0640a6f3af257 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -135,13 +135,13 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a6, a3, -1
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a6, .LBB3_10
-; CHECK-NOV-NEXT: .LBB3_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a6, .LBB3_11
; CHECK-NOV-NEXT: .LBB3_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a4, a6, .LBB3_12
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB3_3: # %entry
; CHECK-NOV-NEXT: bge a5, a6, .LBB3_13
; CHECK-NOV-NEXT: .LBB3_4: # %entry
; CHECK-NOV-NEXT: bge a3, a5, .LBB3_14
@@ -161,10 +161,12 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB3_10: # %entry
; CHECK-NOV-NEXT: mv a1, a6
-; CHECK-NOV-NEXT: j .LBB3_1
+; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
+; CHECK-NOV-NEXT: blt a2, a6, .LBB3_2
; CHECK-NOV-NEXT: .LBB3_11: # %entry
; CHECK-NOV-NEXT: mv a2, a6
-; CHECK-NOV-NEXT: j .LBB3_2
+; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
+; CHECK-NOV-NEXT: blt a4, a6, .LBB3_3
; CHECK-NOV-NEXT: .LBB3_12: # %entry
; CHECK-NOV-NEXT: mv a4, a6
; CHECK-NOV-NEXT: blt a5, a6, .LBB3_4
@@ -206,13 +208,13 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: srli a3, a3, 32
; CHECK-NOV-NEXT: fcvt.lu.s a2, fa1, rtz
; CHECK-NOV-NEXT: bgeu a1, a3, .LBB4_6
-; CHECK-NOV-NEXT: .LBB4_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz
; CHECK-NOV-NEXT: bgeu a2, a3, .LBB4_7
; CHECK-NOV-NEXT: .LBB4_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz
; CHECK-NOV-NEXT: bgeu a4, a3, .LBB4_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB4_3: # %entry
; CHECK-NOV-NEXT: bltu a5, a3, .LBB4_5
; CHECK-NOV-NEXT: .LBB4_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -224,10 +226,12 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB4_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: j .LBB4_1
+; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz
+; CHECK-NOV-NEXT: bltu a2, a3, .LBB4_2
; CHECK-NOV-NEXT: .LBB4_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: j .LBB4_2
+; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz
+; CHECK-NOV-NEXT: bltu a4, a3, .LBB4_3
; CHECK-NOV-NEXT: .LBB4_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bgeu a5, a3, .LBB4_4
@@ -255,13 +259,13 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: srli a4, a4, 32
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6
-; CHECK-NOV-NEXT: .LBB5_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB5_7
; CHECK-NOV-NEXT: .LBB5_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a3, a4, .LBB5_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB5_3: # %entry
; CHECK-NOV-NEXT: blt a5, a4, .LBB5_5
; CHECK-NOV-NEXT: .LBB5_4: # %entry
; CHECK-NOV-NEXT: mv a5, a4
@@ -285,10 +289,12 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB5_6: # %entry
; CHECK-NOV-NEXT: mv a1, a4
-; CHECK-NOV-NEXT: j .LBB5_1
+; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz
+; CHECK-NOV-NEXT: blt a2, a4, .LBB5_2
; CHECK-NOV-NEXT: .LBB5_7: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: j .LBB5_2
+; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
+; CHECK-NOV-NEXT: blt a3, a4, .LBB5_3
; CHECK-NOV-NEXT: .LBB5_8: # %entry
; CHECK-NOV-NEXT: mv a3, a4
; CHECK-NOV-NEXT: bge a5, a4, .LBB5_4
@@ -356,13 +362,13 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: lui a1, 524288
; CHECK-NOV-NEXT: addiw a4, a1, -1
; CHECK-NOV-NEXT: bge a0, a4, .LBB6_10
-; CHECK-NOV-NEXT: .LBB6_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
; CHECK-NOV-NEXT: bge s1, a4, .LBB6_11
; CHECK-NOV-NEXT: .LBB6_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB6_12
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB6_3: # %entry
; CHECK-NOV-NEXT: bge a3, a4, .LBB6_13
; CHECK-NOV-NEXT: .LBB6_4: # %entry
; CHECK-NOV-NEXT: bge a1, a3, .LBB6_14
@@ -401,10 +407,12 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB6_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a4
-; CHECK-NOV-NEXT: j .LBB6_1
+; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
+; CHECK-NOV-NEXT: blt s1, a4, .LBB6_2
; CHECK-NOV-NEXT: .LBB6_11: # %entry
; CHECK-NOV-NEXT: mv s1, a4
-; CHECK-NOV-NEXT: j .LBB6_2
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
+; CHECK-NOV-NEXT: blt a2, a4, .LBB6_3
; CHECK-NOV-NEXT: .LBB6_12: # %entry
; CHECK-NOV-NEXT: mv a2, a4
; CHECK-NOV-NEXT: blt a3, a4, .LBB6_4
@@ -554,13 +562,13 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: li a1, -1
; CHECK-NOV-NEXT: srli a1, a1, 32
; CHECK-NOV-NEXT: bgeu a0, a1, .LBB7_6
-; CHECK-NOV-NEXT: .LBB7_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
; CHECK-NOV-NEXT: bgeu s1, a1, .LBB7_7
; CHECK-NOV-NEXT: .LBB7_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
; CHECK-NOV-NEXT: bgeu a2, a1, .LBB7_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB7_3: # %entry
; CHECK-NOV-NEXT: bltu a3, a1, .LBB7_5
; CHECK-NOV-NEXT: .LBB7_4: # %entry
; CHECK-NOV-NEXT: mv a3, a1
@@ -591,10 +599,12 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB7_6: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a1
-; CHECK-NOV-NEXT: j .LBB7_1
+; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
+; CHECK-NOV-NEXT: bltu s1, a1, .LBB7_2
; CHECK-NOV-NEXT: .LBB7_7: # %entry
; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: j .LBB7_2
+; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
+; CHECK-NOV-NEXT: bltu a2, a1, .LBB7_3
; CHECK-NOV-NEXT: .LBB7_8: # %entry
; CHECK-NOV-NEXT: mv a2, a1
; CHECK-NOV-NEXT: bgeu a3, a1, .LBB7_4
@@ -730,13 +740,13 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: li a2, -1
; CHECK-NOV-NEXT: srli a2, a2, 32
; CHECK-NOV-NEXT: bge a0, a2, .LBB8_6
-; CHECK-NOV-NEXT: .LBB8_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
; CHECK-NOV-NEXT: bge s1, a2, .LBB8_7
; CHECK-NOV-NEXT: .LBB8_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a1, a2, .LBB8_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB8_3: # %entry
; CHECK-NOV-NEXT: blt a3, a2, .LBB8_5
; CHECK-NOV-NEXT: .LBB8_4: # %entry
; CHECK-NOV-NEXT: mv a3, a2
@@ -779,10 +789,12 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB8_6: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: j .LBB8_1
+; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
+; CHECK-NOV-NEXT: blt s1, a2, .LBB8_2
; CHECK-NOV-NEXT: .LBB8_7: # %entry
; CHECK-NOV-NEXT: mv s1, a2
-; CHECK-NOV-NEXT: j .LBB8_2
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
+; CHECK-NOV-NEXT: blt a1, a2, .LBB8_3
; CHECK-NOV-NEXT: .LBB8_8: # %entry
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: bge a3, a2, .LBB8_4
@@ -893,7 +905,7 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
; CHECK-NOV-NEXT: .LBB9_2: # %entry
; CHECK-NOV-NEXT: lui a2, 1048568
; CHECK-NOV-NEXT: bge a2, a0, .LBB9_7
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB9_3: # %entry
; CHECK-NOV-NEXT: bge a2, a1, .LBB9_8
; CHECK-NOV-NEXT: .LBB9_4: # %entry
; CHECK-NOV-NEXT: ret
@@ -902,7 +914,8 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
; CHECK-NOV-NEXT: blt a0, a2, .LBB9_2
; CHECK-NOV-NEXT: .LBB9_6: # %entry
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: j .LBB9_2
+; CHECK-NOV-NEXT: lui a2, 1048568
+; CHECK-NOV-NEXT: blt a2, a0, .LBB9_3
; CHECK-NOV-NEXT: .LBB9_7: # %entry
; CHECK-NOV-NEXT: lui a0, 1048568
; CHECK-NOV-NEXT: blt a2, a1, .LBB9_4
@@ -1010,18 +1023,18 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a5, a5, -1
; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a5, .LBB12_10
-; CHECK-NOV-NEXT: .LBB12_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a5, .LBB12_11
; CHECK-NOV-NEXT: .LBB12_2: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
; CHECK-NOV-NEXT: bge a3, a5, .LBB12_12
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB12_3: # %entry
; CHECK-NOV-NEXT: bge a4, a5, .LBB12_13
; CHECK-NOV-NEXT: .LBB12_4: # %entry
; CHECK-NOV-NEXT: lui a5, 1048568
; CHECK-NOV-NEXT: bge a5, a4, .LBB12_14
-; CHECK-NOV-NEXT: # %bb.5: # %entry
+; CHECK-NOV-NEXT: .LBB12_5: # %entry
; CHECK-NOV-NEXT: bge a5, a3, .LBB12_15
; CHECK-NOV-NEXT: .LBB12_6: # %entry
; CHECK-NOV-NEXT: bge a5, a2, .LBB12_16
@@ -1037,16 +1050,19 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB12_10: # %entry
; CHECK-NOV-NEXT: mv a1, a5
-; CHECK-NOV-NEXT: j .LBB12_1
+; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
+; CHECK-NOV-NEXT: blt a2, a5, .LBB12_2
; CHECK-NOV-NEXT: .LBB12_11: # %entry
; CHECK-NOV-NEXT: mv a2, a5
-; CHECK-NOV-NEXT: j .LBB12_2
+; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
+; CHECK-NOV-NEXT: blt a3, a5, .LBB12_3
; CHECK-NOV-NEXT: .LBB12_12: # %entry
; CHECK-NOV-NEXT: mv a3, a5
; CHECK-NOV-NEXT: blt a4, a5, .LBB12_4
; CHECK-NOV-NEXT: .LBB12_13: # %entry
; CHECK-NOV-NEXT: mv a4, a5
-; CHECK-NOV-NEXT: j .LBB12_4
+; CHECK-NOV-NEXT: lui a5, 1048568
+; CHECK-NOV-NEXT: blt a5, a4, .LBB12_5
; CHECK-NOV-NEXT: .LBB12_14: # %entry
; CHECK-NOV-NEXT: lui a4, 1048568
; CHECK-NOV-NEXT: blt a5, a3, .LBB12_6
@@ -1083,13 +1099,13 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: fcvt.wu.s a2, fa1, rtz
; CHECK-NOV-NEXT: bgeu a1, a3, .LBB13_6
-; CHECK-NOV-NEXT: .LBB13_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz
; CHECK-NOV-NEXT: bgeu a2, a3, .LBB13_7
; CHECK-NOV-NEXT: .LBB13_2: # %entry
; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz
; CHECK-NOV-NEXT: bgeu a4, a3, .LBB13_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB13_3: # %entry
; CHECK-NOV-NEXT: bltu a5, a3, .LBB13_5
; CHECK-NOV-NEXT: .LBB13_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -1101,10 +1117,12 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB13_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: j .LBB13_1
+; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz
+; CHECK-NOV-NEXT: bltu a2, a3, .LBB13_2
; CHECK-NOV-NEXT: .LBB13_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: j .LBB13_2
+; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz
+; CHECK-NOV-NEXT: bltu a4, a3, .LBB13_3
; CHECK-NOV-NEXT: .LBB13_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bgeu a5, a3, .LBB13_4
@@ -1133,13 +1151,13 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a4, a4, -1
; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6
-; CHECK-NOV-NEXT: .LBB14_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB14_7
; CHECK-NOV-NEXT: .LBB14_2: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a3, a4, .LBB14_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB14_3: # %entry
; CHECK-NOV-NEXT: blt a5, a4, .LBB14_5
; CHECK-NOV-NEXT: .LBB14_4: # %entry
; CHECK-NOV-NEXT: mv a5, a4
@@ -1163,10 +1181,12 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB14_6: # %entry
; CHECK-NOV-NEXT: mv a1, a4
-; CHECK-NOV-NEXT: j .LBB14_1
+; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
+; CHECK-NOV-NEXT: blt a2, a4, .LBB14_2
; CHECK-NOV-NEXT: .LBB14_7: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: j .LBB14_2
+; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
+; CHECK-NOV-NEXT: blt a3, a4, .LBB14_3
; CHECK-NOV-NEXT: .LBB14_8: # %entry
; CHECK-NOV-NEXT: mv a3, a4
; CHECK-NOV-NEXT: bge a5, a4, .LBB14_4
@@ -1265,7 +1285,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a7, 8
; CHECK-NOV-NEXT: addiw a7, a7, -1
; CHECK-NOV-NEXT: bge a0, a7, .LBB15_18
-; CHECK-NOV-NEXT: .LBB15_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
; CHECK-NOV-NEXT: bge s1, a7, .LBB15_19
; CHECK-NOV-NEXT: .LBB15_2: # %entry
@@ -1283,12 +1303,12 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB15_6: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
; CHECK-NOV-NEXT: bge a5, a7, .LBB15_24
-; CHECK-NOV-NEXT: # %bb.7: # %entry
+; CHECK-NOV-NEXT: .LBB15_7: # %entry
; CHECK-NOV-NEXT: bge a6, a7, .LBB15_25
; CHECK-NOV-NEXT: .LBB15_8: # %entry
; CHECK-NOV-NEXT: lui a7, 1048568
; CHECK-NOV-NEXT: bge a7, a6, .LBB15_26
-; CHECK-NOV-NEXT: # %bb.9: # %entry
+; CHECK-NOV-NEXT: .LBB15_9: # %entry
; CHECK-NOV-NEXT: bge a7, a5, .LBB15_27
; CHECK-NOV-NEXT: .LBB15_10: # %entry
; CHECK-NOV-NEXT: bge a7, a4, .LBB15_28
@@ -1351,28 +1371,35 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB15_18: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a7
-; CHECK-NOV-NEXT: j .LBB15_1
+; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
+; CHECK-NOV-NEXT: blt s1, a7, .LBB15_2
; CHECK-NOV-NEXT: .LBB15_19: # %entry
; CHECK-NOV-NEXT: mv s1, a7
-; CHECK-NOV-NEXT: j .LBB15_2
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz
+; CHECK-NOV-NEXT: blt a1, a7, .LBB15_3
; CHECK-NOV-NEXT: .LBB15_20: # %entry
; CHECK-NOV-NEXT: mv a1, a7
-; CHECK-NOV-NEXT: j .LBB15_3
+; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz
+; CHECK-NOV-NEXT: blt a3, a7, .LBB15_4
; CHECK-NOV-NEXT: .LBB15_21: # %entry
; CHECK-NOV-NEXT: mv a3, a7
-; CHECK-NOV-NEXT: j .LBB15_4
+; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
+; CHECK-NOV-NEXT: blt a2, a7, .LBB15_5
; CHECK-NOV-NEXT: .LBB15_22: # %entry
; CHECK-NOV-NEXT: mv a2, a7
-; CHECK-NOV-NEXT: j .LBB15_5
+; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
+; CHECK-NOV-NEXT: blt a4, a7, .LBB15_6
; CHECK-NOV-NEXT: .LBB15_23: # %entry
; CHECK-NOV-NEXT: mv a4, a7
-; CHECK-NOV-NEXT: j .LBB15_6
+; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
+; CHECK-NOV-NEXT: blt a5, a7, .LBB15_7
; CHECK-NOV-NEXT: .LBB15_24: # %entry
; CHECK-NOV-NEXT: mv a5, a7
; CHECK-NOV-NEXT: blt a6, a7, .LBB15_8
; CHECK-NOV-NEXT: .LBB15_25: # %entry
; CHECK-NOV-NEXT: mv a6, a7
-; CHECK-NOV-NEXT: j .LBB15_8
+; CHECK-NOV-NEXT: lui a7, 1048568
+; CHECK-NOV-NEXT: blt a7, a6, .LBB15_9
; CHECK-NOV-NEXT: .LBB15_26: # %entry
; CHECK-NOV-NEXT: lui a6, 1048568
; CHECK-NOV-NEXT: blt a7, a5, .LBB15_10
@@ -1642,7 +1669,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a3, 16
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: bgeu a0, a3, .LBB16_10
-; CHECK-NOV-NEXT: .LBB16_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
; CHECK-NOV-NEXT: bgeu s1, a3, .LBB16_11
; CHECK-NOV-NEXT: .LBB16_2: # %entry
@@ -1660,7 +1687,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB16_6: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
; CHECK-NOV-NEXT: bgeu a6, a3, .LBB16_16
-; CHECK-NOV-NEXT: # %bb.7: # %entry
+; CHECK-NOV-NEXT: .LBB16_7: # %entry
; CHECK-NOV-NEXT: bltu a7, a3, .LBB16_9
; CHECK-NOV-NEXT: .LBB16_8: # %entry
; CHECK-NOV-NEXT: mv a7, a3
@@ -1711,22 +1738,28 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB16_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a3
-; CHECK-NOV-NEXT: j .LBB16_1
+; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
+; CHECK-NOV-NEXT: bltu s1, a3, .LBB16_2
; CHECK-NOV-NEXT: .LBB16_11: # %entry
; CHECK-NOV-NEXT: mv s1, a3
-; CHECK-NOV-NEXT: j .LBB16_2
+; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz
+; CHECK-NOV-NEXT: bltu a1, a3, .LBB16_3
; CHECK-NOV-NEXT: .LBB16_12: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: j .LBB16_3
+; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz
+; CHECK-NOV-NEXT: bltu a2, a3, .LBB16_4
; CHECK-NOV-NEXT: .LBB16_13: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: j .LBB16_4
+; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz
+; CHECK-NOV-NEXT: bltu a4, a3, .LBB16_5
; CHECK-NOV-NEXT: .LBB16_14: # %entry
; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: j .LBB16_5
+; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz
+; CHECK-NOV-NEXT: bltu a5, a3, .LBB16_6
; CHECK-NOV-NEXT: .LBB16_15: # %entry
; CHECK-NOV-NEXT: mv a5, a3
-; CHECK-NOV-NEXT: j .LBB16_6
+; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
+; CHECK-NOV-NEXT: bltu a6, a3, .LBB16_7
; CHECK-NOV-NEXT: .LBB16_16: # %entry
; CHECK-NOV-NEXT: mv a6, a3
; CHECK-NOV-NEXT: bgeu a7, a3, .LBB16_8
@@ -1976,7 +2009,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a4, 16
; CHECK-NOV-NEXT: addiw a4, a4, -1
; CHECK-NOV-NEXT: bge a0, a4, .LBB17_10
-; CHECK-NOV-NEXT: .LBB17_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
; CHECK-NOV-NEXT: bge s1, a4, .LBB17_11
; CHECK-NOV-NEXT: .LBB17_2: # %entry
@@ -1994,7 +2027,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB17_6: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
; CHECK-NOV-NEXT: bge a6, a4, .LBB17_16
-; CHECK-NOV-NEXT: # %bb.7: # %entry
+; CHECK-NOV-NEXT: .LBB17_7: # %entry
; CHECK-NOV-NEXT: blt a7, a4, .LBB17_9
; CHECK-NOV-NEXT: .LBB17_8: # %entry
; CHECK-NOV-NEXT: mv a7, a4
@@ -2069,22 +2102,28 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB17_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a4
-; CHECK-NOV-NEXT: j .LBB17_1
+; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
+; CHECK-NOV-NEXT: blt s1, a4, .LBB17_2
; CHECK-NOV-NEXT: .LBB17_11: # %entry
; CHECK-NOV-NEXT: mv s1, a4
-; CHECK-NOV-NEXT: j .LBB17_2
+; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
+; CHECK-NOV-NEXT: blt a1, a4, .LBB17_3
; CHECK-NOV-NEXT: .LBB17_12: # %entry
; CHECK-NOV-NEXT: mv a1, a4
-; CHECK-NOV-NEXT: j .LBB17_3
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz
+; CHECK-NOV-NEXT: blt a2, a4, .LBB17_4
; CHECK-NOV-NEXT: .LBB17_13: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: j .LBB17_4
+; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz
+; CHECK-NOV-NEXT: blt a3, a4, .LBB17_5
; CHECK-NOV-NEXT: .LBB17_14: # %entry
; CHECK-NOV-NEXT: mv a3, a4
-; CHECK-NOV-NEXT: j .LBB17_5
+; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz
+; CHECK-NOV-NEXT: blt a5, a4, .LBB17_6
; CHECK-NOV-NEXT: .LBB17_15: # %entry
; CHECK-NOV-NEXT: mv a5, a4
-; CHECK-NOV-NEXT: j .LBB17_6
+; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
+; CHECK-NOV-NEXT: blt a6, a4, .LBB17_7
; CHECK-NOV-NEXT: .LBB17_16: # %entry
; CHECK-NOV-NEXT: mv a6, a4
; CHECK-NOV-NEXT: bge a7, a4, .LBB17_8
@@ -2573,21 +2612,21 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a0
-; CHECK-NOV-NEXT: beqz a1, .LBB20_8
+; CHECK-NOV-NEXT: beqz a1, .LBB20_7
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a0, a1
-; CHECK-NOV-NEXT: .LBB20_6: # %entry
; CHECK-NOV-NEXT: and a1, a4, s0
-; CHECK-NOV-NEXT: bnez a2, .LBB20_9
-; CHECK-NOV-NEXT: # %bb.7:
+; CHECK-NOV-NEXT: bnez a2, .LBB20_8
+; CHECK-NOV-NEXT: .LBB20_6:
; CHECK-NOV-NEXT: snez a2, a1
-; CHECK-NOV-NEXT: j .LBB20_10
-; CHECK-NOV-NEXT: .LBB20_8:
+; CHECK-NOV-NEXT: j .LBB20_9
+; CHECK-NOV-NEXT: .LBB20_7:
; CHECK-NOV-NEXT: snez a0, a3
-; CHECK-NOV-NEXT: j .LBB20_6
-; CHECK-NOV-NEXT: .LBB20_9: # %entry
+; CHECK-NOV-NEXT: and a1, a4, s0
+; CHECK-NOV-NEXT: beqz a2, .LBB20_6
+; CHECK-NOV-NEXT: .LBB20_8: # %entry
; CHECK-NOV-NEXT: sgtz a2, a2
-; CHECK-NOV-NEXT: .LBB20_10: # %entry
+; CHECK-NOV-NEXT: .LBB20_9: # %entry
; CHECK-NOV-NEXT: neg a2, a2
; CHECK-NOV-NEXT: neg a4, a0
; CHECK-NOV-NEXT: and a0, a2, a1
@@ -2641,21 +2680,21 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: neg a4, a4
; CHECK-V-NEXT: and a0, a4, a0
-; CHECK-V-NEXT: beqz a1, .LBB20_8
+; CHECK-V-NEXT: beqz a1, .LBB20_7
; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a1
-; CHECK-V-NEXT: .LBB20_6: # %entry
; CHECK-V-NEXT: and a3, a3, s0
-; CHECK-V-NEXT: bnez a2, .LBB20_9
-; CHECK-V-NEXT: # %bb.7:
+; CHECK-V-NEXT: bnez a2, .LBB20_8
+; CHECK-V-NEXT: .LBB20_6:
; CHECK-V-NEXT: snez a2, a3
-; CHECK-V-NEXT: j .LBB20_10
-; CHECK-V-NEXT: .LBB20_8:
+; CHECK-V-NEXT: j .LBB20_9
+; CHECK-V-NEXT: .LBB20_7:
; CHECK-V-NEXT: snez a1, a0
-; CHECK-V-NEXT: j .LBB20_6
-; CHECK-V-NEXT: .LBB20_9: # %entry
+; CHECK-V-NEXT: and a3, a3, s0
+; CHECK-V-NEXT: beqz a2, .LBB20_6
+; CHECK-V-NEXT: .LBB20_8: # %entry
; CHECK-V-NEXT: sgtz a2, a2
-; CHECK-V-NEXT: .LBB20_10: # %entry
+; CHECK-V-NEXT: .LBB20_9: # %entry
; CHECK-V-NEXT: neg a2, a2
; CHECK-V-NEXT: neg a1, a1
; CHECK-V-NEXT: and a2, a2, a3
@@ -2995,21 +3034,21 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a0
-; CHECK-NOV-NEXT: beqz a1, .LBB23_8
+; CHECK-NOV-NEXT: beqz a1, .LBB23_7
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a0, a1
-; CHECK-NOV-NEXT: .LBB23_6: # %entry
; CHECK-NOV-NEXT: and a1, a4, s0
-; CHECK-NOV-NEXT: bnez a2, .LBB23_9
-; CHECK-NOV-NEXT: # %bb.7:
+; CHECK-NOV-NEXT: bnez a2, .LBB23_8
+; CHECK-NOV-NEXT: .LBB23_6:
; CHECK-NOV-NEXT: snez a2, a1
-; CHECK-NOV-NEXT: j .LBB23_10
-; CHECK-NOV-NEXT: .LBB23_8:
+; CHECK-NOV-NEXT: j .LBB23_9
+; CHECK-NOV-NEXT: .LBB23_7:
; CHECK-NOV-NEXT: snez a0, a3
-; CHECK-NOV-NEXT: j .LBB23_6
-; CHECK-NOV-NEXT: .LBB23_9: # %entry
+; CHECK-NOV-NEXT: and a1, a4, s0
+; CHECK-NOV-NEXT: beqz a2, .LBB23_6
+; CHECK-NOV-NEXT: .LBB23_8: # %entry
; CHECK-NOV-NEXT: sgtz a2, a2
-; CHECK-NOV-NEXT: .LBB23_10: # %entry
+; CHECK-NOV-NEXT: .LBB23_9: # %entry
; CHECK-NOV-NEXT: neg a2, a2
; CHECK-NOV-NEXT: neg a4, a0
; CHECK-NOV-NEXT: and a0, a2, a1
@@ -3063,21 +3102,21 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: neg a4, a4
; CHECK-V-NEXT: and a0, a4, a0
-; CHECK-V-NEXT: beqz a1, .LBB23_8
+; CHECK-V-NEXT: beqz a1, .LBB23_7
; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a1
-; CHECK-V-NEXT: .LBB23_6: # %entry
; CHECK-V-NEXT: and a3, a3, s0
-; CHECK-V-NEXT: bnez a2, .LBB23_9
-; CHECK-V-NEXT: # %bb.7:
+; CHECK-V-NEXT: bnez a2, .LBB23_8
+; CHECK-V-NEXT: .LBB23_6:
; CHECK-V-NEXT: snez a2, a3
-; CHECK-V-NEXT: j .LBB23_10
-; CHECK-V-NEXT: .LBB23_8:
+; CHECK-V-NEXT: j .LBB23_9
+; CHECK-V-NEXT: .LBB23_7:
; CHECK-V-NEXT: snez a1, a0
-; CHECK-V-NEXT: j .LBB23_6
-; CHECK-V-NEXT: .LBB23_9: # %entry
+; CHECK-V-NEXT: and a3, a3, s0
+; CHECK-V-NEXT: beqz a2, .LBB23_6
+; CHECK-V-NEXT: .LBB23_8: # %entry
; CHECK-V-NEXT: sgtz a2, a2
-; CHECK-V-NEXT: .LBB23_10: # %entry
+; CHECK-V-NEXT: .LBB23_9: # %entry
; CHECK-V-NEXT: neg a2, a2
; CHECK-V-NEXT: neg a1, a1
; CHECK-V-NEXT: and a2, a2, a3
@@ -3420,21 +3459,21 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a0
-; CHECK-NOV-NEXT: beqz a1, .LBB26_8
+; CHECK-NOV-NEXT: beqz a1, .LBB26_7
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a0, a1
-; CHECK-NOV-NEXT: .LBB26_6: # %entry
; CHECK-NOV-NEXT: and a1, a4, s0
-; CHECK-NOV-NEXT: bnez a2, .LBB26_9
-; CHECK-NOV-NEXT: # %bb.7:
+; CHECK-NOV-NEXT: bnez a2, .LBB26_8
+; CHECK-NOV-NEXT: .LBB26_6:
; CHECK-NOV-NEXT: snez a2, a1
-; CHECK-NOV-NEXT: j .LBB26_10
-; CHECK-NOV-NEXT: .LBB26_8:
+; CHECK-NOV-NEXT: j .LBB26_9
+; CHECK-NOV-NEXT: .LBB26_7:
; CHECK-NOV-NEXT: snez a0, a3
-; CHECK-NOV-NEXT: j .LBB26_6
-; CHECK-NOV-NEXT: .LBB26_9: # %entry
+; CHECK-NOV-NEXT: and a1, a4, s0
+; CHECK-NOV-NEXT: beqz a2, .LBB26_6
+; CHECK-NOV-NEXT: .LBB26_8: # %entry
; CHECK-NOV-NEXT: sgtz a2, a2
-; CHECK-NOV-NEXT: .LBB26_10: # %entry
+; CHECK-NOV-NEXT: .LBB26_9: # %entry
; CHECK-NOV-NEXT: neg a2, a2
; CHECK-NOV-NEXT: neg a4, a0
; CHECK-NOV-NEXT: and a0, a2, a1
@@ -3486,21 +3525,21 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: neg a4, a4
; CHECK-V-NEXT: and a0, a4, a0
-; CHECK-V-NEXT: beqz a1, .LBB26_8
+; CHECK-V-NEXT: beqz a1, .LBB26_7
; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a1
-; CHECK-V-NEXT: .LBB26_6: # %entry
; CHECK-V-NEXT: and a3, a3, s0
-; CHECK-V-NEXT: bnez a2, .LBB26_9
-; CHECK-V-NEXT: # %bb.7:
+; CHECK-V-NEXT: bnez a2, .LBB26_8
+; CHECK-V-NEXT: .LBB26_6:
; CHECK-V-NEXT: snez a2, a3
-; CHECK-V-NEXT: j .LBB26_10
-; CHECK-V-NEXT: .LBB26_8:
+; CHECK-V-NEXT: j .LBB26_9
+; CHECK-V-NEXT: .LBB26_7:
; CHECK-V-NEXT: snez a1, a0
-; CHECK-V-NEXT: j .LBB26_6
-; CHECK-V-NEXT: .LBB26_9: # %entry
+; CHECK-V-NEXT: and a3, a3, s0
+; CHECK-V-NEXT: beqz a2, .LBB26_6
+; CHECK-V-NEXT: .LBB26_8: # %entry
; CHECK-V-NEXT: sgtz a2, a2
-; CHECK-V-NEXT: .LBB26_10: # %entry
+; CHECK-V-NEXT: .LBB26_9: # %entry
; CHECK-V-NEXT: neg a2, a2
; CHECK-V-NEXT: neg a1, a1
; CHECK-V-NEXT: and a2, a2, a3
@@ -3658,13 +3697,13 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a6, a3, -1
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a6, .LBB30_10
-; CHECK-NOV-NEXT: .LBB30_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a6, .LBB30_11
; CHECK-NOV-NEXT: .LBB30_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a4, a6, .LBB30_12
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB30_3: # %entry
; CHECK-NOV-NEXT: bge a5, a6, .LBB30_13
; CHECK-NOV-NEXT: .LBB30_4: # %entry
; CHECK-NOV-NEXT: bge a3, a5, .LBB30_14
@@ -3684,10 +3723,12 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB30_10: # %entry
; CHECK-NOV-NEXT: mv a1, a6
-; CHECK-NOV-NEXT: j .LBB30_1
+; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
+; CHECK-NOV-NEXT: blt a2, a6, .LBB30_2
; CHECK-NOV-NEXT: .LBB30_11: # %entry
; CHECK-NOV-NEXT: mv a2, a6
-; CHECK-NOV-NEXT: j .LBB30_2
+; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
+; CHECK-NOV-NEXT: blt a4, a6, .LBB30_3
; CHECK-NOV-NEXT: .LBB30_12: # %entry
; CHECK-NOV-NEXT: mv a4, a6
; CHECK-NOV-NEXT: blt a5, a6, .LBB30_4
@@ -3727,13 +3768,13 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: srli a3, a3, 32
; CHECK-NOV-NEXT: fcvt.lu.s a2, fa1, rtz
; CHECK-NOV-NEXT: bgeu a1, a3, .LBB31_6
-; CHECK-NOV-NEXT: .LBB31_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz
; CHECK-NOV-NEXT: bgeu a2, a3, .LBB31_7
; CHECK-NOV-NEXT: .LBB31_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz
; CHECK-NOV-NEXT: bgeu a4, a3, .LBB31_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB31_3: # %entry
; CHECK-NOV-NEXT: bltu a5, a3, .LBB31_5
; CHECK-NOV-NEXT: .LBB31_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -3745,10 +3786,12 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB31_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: j .LBB31_1
+; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz
+; CHECK-NOV-NEXT: bltu a2, a3, .LBB31_2
; CHECK-NOV-NEXT: .LBB31_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: j .LBB31_2
+; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz
+; CHECK-NOV-NEXT: bltu a4, a3, .LBB31_3
; CHECK-NOV-NEXT: .LBB31_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bgeu a5, a3, .LBB31_4
@@ -3775,13 +3818,13 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: srli a3, a3, 32
; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a3, .LBB32_6
-; CHECK-NOV-NEXT: .LBB32_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a3, .LBB32_7
; CHECK-NOV-NEXT: .LBB32_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a4, a3, .LBB32_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB32_3: # %entry
; CHECK-NOV-NEXT: blt a5, a3, .LBB32_5
; CHECK-NOV-NEXT: .LBB32_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -3805,10 +3848,12 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB32_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: j .LBB32_1
+; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz
+; CHECK-NOV-NEXT: blt a2, a3, .LBB32_2
; CHECK-NOV-NEXT: .LBB32_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: j .LBB32_2
+; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
+; CHECK-NOV-NEXT: blt a4, a3, .LBB32_3
; CHECK-NOV-NEXT: .LBB32_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bge a5, a3, .LBB32_4
@@ -3874,13 +3919,13 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: lui a1, 524288
; CHECK-NOV-NEXT: addiw a4, a1, -1
; CHECK-NOV-NEXT: bge a0, a4, .LBB33_10
-; CHECK-NOV-NEXT: .LBB33_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
; CHECK-NOV-NEXT: bge s1, a4, .LBB33_11
; CHECK-NOV-NEXT: .LBB33_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB33_12
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB33_3: # %entry
; CHECK-NOV-NEXT: bge a3, a4, .LBB33_13
; CHECK-NOV-NEXT: .LBB33_4: # %entry
; CHECK-NOV-NEXT: bge a1, a3, .LBB33_14
@@ -3919,10 +3964,12 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB33_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a4
-; CHECK-NOV-NEXT: j .LBB33_1
+; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
+; CHECK-NOV-NEXT: blt s1, a4, .LBB33_2
; CHECK-NOV-NEXT: .LBB33_11: # %entry
; CHECK-NOV-NEXT: mv s1, a4
-; CHECK-NOV-NEXT: j .LBB33_2
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
+; CHECK-NOV-NEXT: blt a2, a4, .LBB33_3
; CHECK-NOV-NEXT: .LBB33_12: # %entry
; CHECK-NOV-NEXT: mv a2, a4
; CHECK-NOV-NEXT: blt a3, a4, .LBB33_4
@@ -4070,13 +4117,13 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: li a1, -1
; CHECK-NOV-NEXT: srli a1, a1, 32
; CHECK-NOV-NEXT: bgeu a0, a1, .LBB34_6
-; CHECK-NOV-NEXT: .LBB34_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
; CHECK-NOV-NEXT: bgeu s1, a1, .LBB34_7
; CHECK-NOV-NEXT: .LBB34_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
; CHECK-NOV-NEXT: bgeu a2, a1, .LBB34_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB34_3: # %entry
; CHECK-NOV-NEXT: bltu a3, a1, .LBB34_5
; CHECK-NOV-NEXT: .LBB34_4: # %entry
; CHECK-NOV-NEXT: mv a3, a1
@@ -4107,10 +4154,12 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB34_6: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a1
-; CHECK-NOV-NEXT: j .LBB34_1
+; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
+; CHECK-NOV-NEXT: bltu s1, a1, .LBB34_2
; CHECK-NOV-NEXT: .LBB34_7: # %entry
; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: j .LBB34_2
+; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
+; CHECK-NOV-NEXT: bltu a2, a1, .LBB34_3
; CHECK-NOV-NEXT: .LBB34_8: # %entry
; CHECK-NOV-NEXT: mv a2, a1
; CHECK-NOV-NEXT: bgeu a3, a1, .LBB34_4
@@ -4245,13 +4294,13 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: li a2, -1
; CHECK-NOV-NEXT: srli a2, a2, 32
; CHECK-NOV-NEXT: bge a0, a2, .LBB35_6
-; CHECK-NOV-NEXT: .LBB35_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
; CHECK-NOV-NEXT: bge s1, a2, .LBB35_7
; CHECK-NOV-NEXT: .LBB35_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a1, a2, .LBB35_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB35_3: # %entry
; CHECK-NOV-NEXT: blt a3, a2, .LBB35_5
; CHECK-NOV-NEXT: .LBB35_4: # %entry
; CHECK-NOV-NEXT: mv a3, a2
@@ -4294,10 +4343,12 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB35_6: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: j .LBB35_1
+; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
+; CHECK-NOV-NEXT: blt s1, a2, .LBB35_2
; CHECK-NOV-NEXT: .LBB35_7: # %entry
; CHECK-NOV-NEXT: mv s1, a2
-; CHECK-NOV-NEXT: j .LBB35_2
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
+; CHECK-NOV-NEXT: blt a1, a2, .LBB35_3
; CHECK-NOV-NEXT: .LBB35_8: # %entry
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: bge a3, a2, .LBB35_4
@@ -4406,7 +4457,7 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
; CHECK-NOV-NEXT: .LBB36_2: # %entry
; CHECK-NOV-NEXT: lui a2, 1048568
; CHECK-NOV-NEXT: bge a2, a0, .LBB36_7
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB36_3: # %entry
; CHECK-NOV-NEXT: bge a2, a1, .LBB36_8
; CHECK-NOV-NEXT: .LBB36_4: # %entry
; CHECK-NOV-NEXT: ret
@@ -4415,7 +4466,8 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
; CHECK-NOV-NEXT: blt a0, a2, .LBB36_2
; CHECK-NOV-NEXT: .LBB36_6: # %entry
; CHECK-NOV-NEXT: mv a0, a2
-; CHECK-NOV-NEXT: j .LBB36_2
+; CHECK-NOV-NEXT: lui a2, 1048568
+; CHECK-NOV-NEXT: blt a2, a0, .LBB36_3
; CHECK-NOV-NEXT: .LBB36_7: # %entry
; CHECK-NOV-NEXT: lui a0, 1048568
; CHECK-NOV-NEXT: blt a2, a1, .LBB36_4
@@ -4518,18 +4570,18 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a5, a5, -1
; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a5, .LBB39_10
-; CHECK-NOV-NEXT: .LBB39_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a5, .LBB39_11
; CHECK-NOV-NEXT: .LBB39_2: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
; CHECK-NOV-NEXT: bge a3, a5, .LBB39_12
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB39_3: # %entry
; CHECK-NOV-NEXT: bge a4, a5, .LBB39_13
; CHECK-NOV-NEXT: .LBB39_4: # %entry
; CHECK-NOV-NEXT: lui a5, 1048568
; CHECK-NOV-NEXT: bge a5, a4, .LBB39_14
-; CHECK-NOV-NEXT: # %bb.5: # %entry
+; CHECK-NOV-NEXT: .LBB39_5: # %entry
; CHECK-NOV-NEXT: bge a5, a3, .LBB39_15
; CHECK-NOV-NEXT: .LBB39_6: # %entry
; CHECK-NOV-NEXT: bge a5, a2, .LBB39_16
@@ -4545,16 +4597,19 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB39_10: # %entry
; CHECK-NOV-NEXT: mv a1, a5
-; CHECK-NOV-NEXT: j .LBB39_1
+; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
+; CHECK-NOV-NEXT: blt a2, a5, .LBB39_2
; CHECK-NOV-NEXT: .LBB39_11: # %entry
; CHECK-NOV-NEXT: mv a2, a5
-; CHECK-NOV-NEXT: j .LBB39_2
+; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz
+; CHECK-NOV-NEXT: blt a3, a5, .LBB39_3
; CHECK-NOV-NEXT: .LBB39_12: # %entry
; CHECK-NOV-NEXT: mv a3, a5
; CHECK-NOV-NEXT: blt a4, a5, .LBB39_4
; CHECK-NOV-NEXT: .LBB39_13: # %entry
; CHECK-NOV-NEXT: mv a4, a5
-; CHECK-NOV-NEXT: j .LBB39_4
+; CHECK-NOV-NEXT: lui a5, 1048568
+; CHECK-NOV-NEXT: blt a5, a4, .LBB39_5
; CHECK-NOV-NEXT: .LBB39_14: # %entry
; CHECK-NOV-NEXT: lui a4, 1048568
; CHECK-NOV-NEXT: blt a5, a3, .LBB39_6
@@ -4589,13 +4644,13 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: fcvt.wu.s a2, fa1, rtz
; CHECK-NOV-NEXT: bgeu a1, a3, .LBB40_6
-; CHECK-NOV-NEXT: .LBB40_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz
; CHECK-NOV-NEXT: bgeu a2, a3, .LBB40_7
; CHECK-NOV-NEXT: .LBB40_2: # %entry
; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz
; CHECK-NOV-NEXT: bgeu a4, a3, .LBB40_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB40_3: # %entry
; CHECK-NOV-NEXT: bltu a5, a3, .LBB40_5
; CHECK-NOV-NEXT: .LBB40_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -4607,10 +4662,12 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB40_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: j .LBB40_1
+; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz
+; CHECK-NOV-NEXT: bltu a2, a3, .LBB40_2
; CHECK-NOV-NEXT: .LBB40_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: j .LBB40_2
+; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz
+; CHECK-NOV-NEXT: bltu a4, a3, .LBB40_3
; CHECK-NOV-NEXT: .LBB40_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bgeu a5, a3, .LBB40_4
@@ -4638,13 +4695,13 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a3, .LBB41_6
-; CHECK-NOV-NEXT: .LBB41_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a4, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a3, .LBB41_7
; CHECK-NOV-NEXT: .LBB41_2: # %entry
; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a4, a3, .LBB41_8
-; CHECK-NOV-NEXT: # %bb.3: # %entry
+; CHECK-NOV-NEXT: .LBB41_3: # %entry
; CHECK-NOV-NEXT: blt a5, a3, .LBB41_5
; CHECK-NOV-NEXT: .LBB41_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
@@ -4668,10 +4725,12 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB41_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: j .LBB41_1
+; CHECK-NOV-NEXT: fcvt.w.s a4, fa1, rtz
+; CHECK-NOV-NEXT: blt a2, a3, .LBB41_2
; CHECK-NOV-NEXT: .LBB41_7: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: j .LBB41_2
+; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
+; CHECK-NOV-NEXT: blt a4, a3, .LBB41_3
; CHECK-NOV-NEXT: .LBB41_8: # %entry
; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: bge a5, a3, .LBB41_4
@@ -4768,7 +4827,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a7, 8
; CHECK-NOV-NEXT: addiw a7, a7, -1
; CHECK-NOV-NEXT: bge a0, a7, .LBB42_18
-; CHECK-NOV-NEXT: .LBB42_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
; CHECK-NOV-NEXT: bge s1, a7, .LBB42_19
; CHECK-NOV-NEXT: .LBB42_2: # %entry
@@ -4786,12 +4845,12 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB42_6: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
; CHECK-NOV-NEXT: bge a5, a7, .LBB42_24
-; CHECK-NOV-NEXT: # %bb.7: # %entry
+; CHECK-NOV-NEXT: .LBB42_7: # %entry
; CHECK-NOV-NEXT: bge a6, a7, .LBB42_25
; CHECK-NOV-NEXT: .LBB42_8: # %entry
; CHECK-NOV-NEXT: lui a7, 1048568
; CHECK-NOV-NEXT: bge a7, a6, .LBB42_26
-; CHECK-NOV-NEXT: # %bb.9: # %entry
+; CHECK-NOV-NEXT: .LBB42_9: # %entry
; CHECK-NOV-NEXT: bge a7, a5, .LBB42_27
; CHECK-NOV-NEXT: .LBB42_10: # %entry
; CHECK-NOV-NEXT: bge a7, a4, .LBB42_28
@@ -4854,28 +4913,35 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB42_18: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a7
-; CHECK-NOV-NEXT: j .LBB42_1
+; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
+; CHECK-NOV-NEXT: blt s1, a7, .LBB42_2
; CHECK-NOV-NEXT: .LBB42_19: # %entry
; CHECK-NOV-NEXT: mv s1, a7
-; CHECK-NOV-NEXT: j .LBB42_2
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz
+; CHECK-NOV-NEXT: blt a1, a7, .LBB42_3
; CHECK-NOV-NEXT: .LBB42_20: # %entry
; CHECK-NOV-NEXT: mv a1, a7
-; CHECK-NOV-NEXT: j .LBB42_3
+; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz
+; CHECK-NOV-NEXT: blt a3, a7, .LBB42_4
; CHECK-NOV-NEXT: .LBB42_21: # %entry
; CHECK-NOV-NEXT: mv a3, a7
-; CHECK-NOV-NEXT: j .LBB42_4
+; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
+; CHECK-NOV-NEXT: blt a2, a7, .LBB42_5
; CHECK-NOV-NEXT: .LBB42_22: # %entry
; CHECK-NOV-NEXT: mv a2, a7
-; CHECK-NOV-NEXT: j .LBB42_5
+; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
+; CHECK-NOV-NEXT: blt a4, a7, .LBB42_6
; CHECK-NOV-NEXT: .LBB42_23: # %entry
; CHECK-NOV-NEXT: mv a4, a7
-; CHECK-NOV-NEXT: j .LBB42_6
+; CHECK-NOV-NEXT: fcvt.l.s a6, fs0, rtz
+; CHECK-NOV-NEXT: blt a5, a7, .LBB42_7
; CHECK-NOV-NEXT: .LBB42_24: # %entry
; CHECK-NOV-NEXT: mv a5, a7
; CHECK-NOV-NEXT: blt a6, a7, .LBB42_8
; CHECK-NOV-NEXT: .LBB42_25: # %entry
; CHECK-NOV-NEXT: mv a6, a7
-; CHECK-NOV-NEXT: j .LBB42_8
+; CHECK-NOV-NEXT: lui a7, 1048568
+; CHECK-NOV-NEXT: blt a7, a6, .LBB42_9
; CHECK-NOV-NEXT: .LBB42_26: # %entry
; CHECK-NOV-NEXT: lui a6, 1048568
; CHECK-NOV-NEXT: blt a7, a5, .LBB42_10
@@ -5143,7 +5209,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a3, 16
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: bgeu a0, a3, .LBB43_10
-; CHECK-NOV-NEXT: .LBB43_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
; CHECK-NOV-NEXT: bgeu s1, a3, .LBB43_11
; CHECK-NOV-NEXT: .LBB43_2: # %entry
@@ -5161,7 +5227,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB43_6: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
; CHECK-NOV-NEXT: bgeu a6, a3, .LBB43_16
-; CHECK-NOV-NEXT: # %bb.7: # %entry
+; CHECK-NOV-NEXT: .LBB43_7: # %entry
; CHECK-NOV-NEXT: bltu a7, a3, .LBB43_9
; CHECK-NOV-NEXT: .LBB43_8: # %entry
; CHECK-NOV-NEXT: mv a7, a3
@@ -5212,22 +5278,28 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB43_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a3
-; CHECK-NOV-NEXT: j .LBB43_1
+; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
+; CHECK-NOV-NEXT: bltu s1, a3, .LBB43_2
; CHECK-NOV-NEXT: .LBB43_11: # %entry
; CHECK-NOV-NEXT: mv s1, a3
-; CHECK-NOV-NEXT: j .LBB43_2
+; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz
+; CHECK-NOV-NEXT: bltu a1, a3, .LBB43_3
; CHECK-NOV-NEXT: .LBB43_12: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: j .LBB43_3
+; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz
+; CHECK-NOV-NEXT: bltu a2, a3, .LBB43_4
; CHECK-NOV-NEXT: .LBB43_13: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: j .LBB43_4
+; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz
+; CHECK-NOV-NEXT: bltu a4, a3, .LBB43_5
; CHECK-NOV-NEXT: .LBB43_14: # %entry
; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: j .LBB43_5
+; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz
+; CHECK-NOV-NEXT: bltu a5, a3, .LBB43_6
; CHECK-NOV-NEXT: .LBB43_15: # %entry
; CHECK-NOV-NEXT: mv a5, a3
-; CHECK-NOV-NEXT: j .LBB43_6
+; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
+; CHECK-NOV-NEXT: bltu a6, a3, .LBB43_7
; CHECK-NOV-NEXT: .LBB43_16: # %entry
; CHECK-NOV-NEXT: mv a6, a3
; CHECK-NOV-NEXT: bgeu a7, a3, .LBB43_8
@@ -5476,7 +5548,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: lui a3, 16
; CHECK-NOV-NEXT: addiw a3, a3, -1
; CHECK-NOV-NEXT: bge a0, a3, .LBB44_10
-; CHECK-NOV-NEXT: .LBB44_1: # %entry
+; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
; CHECK-NOV-NEXT: bge s1, a3, .LBB44_11
; CHECK-NOV-NEXT: .LBB44_2: # %entry
@@ -5494,7 +5566,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB44_6: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
; CHECK-NOV-NEXT: bge a6, a3, .LBB44_16
-; CHECK-NOV-NEXT: # %bb.7: # %entry
+; CHECK-NOV-NEXT: .LBB44_7: # %entry
; CHECK-NOV-NEXT: blt a7, a3, .LBB44_9
; CHECK-NOV-NEXT: .LBB44_8: # %entry
; CHECK-NOV-NEXT: mv a7, a3
@@ -5569,22 +5641,28 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB44_10: # %entry
; CHECK-NOV-NEXT: .cfi_restore_state
; CHECK-NOV-NEXT: mv a0, a3
-; CHECK-NOV-NEXT: j .LBB44_1
+; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
+; CHECK-NOV-NEXT: blt s1, a3, .LBB44_2
; CHECK-NOV-NEXT: .LBB44_11: # %entry
; CHECK-NOV-NEXT: mv s1, a3
-; CHECK-NOV-NEXT: j .LBB44_2
+; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
+; CHECK-NOV-NEXT: blt a1, a3, .LBB44_3
; CHECK-NOV-NEXT: .LBB44_12: # %entry
; CHECK-NOV-NEXT: mv a1, a3
-; CHECK-NOV-NEXT: j .LBB44_3
+; CHECK-NOV-NEXT: fcvt.l.s a4, fs3, rtz
+; CHECK-NOV-NEXT: blt a2, a3, .LBB44_4
; CHECK-NOV-NEXT: .LBB44_13: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: j .LBB44_4
+; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz
+; CHECK-NOV-NEXT: blt a4, a3, .LBB44_5
; CHECK-NOV-NEXT: .LBB44_14: # %entry
; CHECK-NOV-NEXT: mv a4, a3
-; CHECK-NOV-NEXT: j .LBB44_5
+; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz
+; CHECK-NOV-NEXT: blt a5, a3, .LBB44_6
; CHECK-NOV-NEXT: .LBB44_15: # %entry
; CHECK-NOV-NEXT: mv a5, a3
-; CHECK-NOV-NEXT: j .LBB44_6
+; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz
+; CHECK-NOV-NEXT: blt a6, a3, .LBB44_7
; CHECK-NOV-NEXT: .LBB44_16: # %entry
; CHECK-NOV-NEXT: mv a6, a3
; CHECK-NOV-NEXT: bge a7, a3, .LBB44_8
@@ -5814,27 +5892,28 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-NOV-NEXT: # %bb.10: # %entry
; CHECK-NOV-NEXT: slti a5, a5, 0
; CHECK-NOV-NEXT: xori a5, a5, 1
-; CHECK-NOV-NEXT: j .LBB45_12
+; CHECK-NOV-NEXT: and a1, a4, a1
+; CHECK-NOV-NEXT: beqz a5, .LBB45_12
+; CHECK-NOV-NEXT: j .LBB45_13
; CHECK-NOV-NEXT: .LBB45_11:
; CHECK-NOV-NEXT: sltu a5, a3, s0
-; CHECK-NOV-NEXT: .LBB45_12:
; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: bnez a5, .LBB45_14
-; CHECK-NOV-NEXT: # %bb.13: # %entry
+; CHECK-NOV-NEXT: bnez a5, .LBB45_13
+; CHECK-NOV-NEXT: .LBB45_12: # %entry
; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: .LBB45_14: # %entry
-; CHECK-NOV-NEXT: beq a1, a0, .LBB45_16
-; CHECK-NOV-NEXT: # %bb.15: # %entry
+; CHECK-NOV-NEXT: .LBB45_13: # %entry
+; CHECK-NOV-NEXT: beq a1, a0, .LBB45_15
+; CHECK-NOV-NEXT: # %bb.14: # %entry
; CHECK-NOV-NEXT: slti a0, a1, 0
; CHECK-NOV-NEXT: xori a0, a0, 1
-; CHECK-NOV-NEXT: beqz a0, .LBB45_17
-; CHECK-NOV-NEXT: j .LBB45_18
-; CHECK-NOV-NEXT: .LBB45_16:
+; CHECK-NOV-NEXT: beqz a0, .LBB45_16
+; CHECK-NOV-NEXT: j .LBB45_17
+; CHECK-NOV-NEXT: .LBB45_15:
; CHECK-NOV-NEXT: sltu a0, a3, a2
-; CHECK-NOV-NEXT: bnez a0, .LBB45_18
-; CHECK-NOV-NEXT: .LBB45_17: # %entry
+; CHECK-NOV-NEXT: bnez a0, .LBB45_17
+; CHECK-NOV-NEXT: .LBB45_16: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: .LBB45_18: # %entry
+; CHECK-NOV-NEXT: .LBB45_17: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -5904,27 +5983,28 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: # %bb.10: # %entry
; CHECK-V-NEXT: slti a5, a5, 0
; CHECK-V-NEXT: xori a5, a5, 1
-; CHECK-V-NEXT: j .LBB45_12
+; CHECK-V-NEXT: and a1, a4, a1
+; CHECK-V-NEXT: beqz a5, .LBB45_12
+; CHECK-V-NEXT: j .LBB45_13
; CHECK-V-NEXT: .LBB45_11:
; CHECK-V-NEXT: sltu a5, a3, s0
-; CHECK-V-NEXT: .LBB45_12:
; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: bnez a5, .LBB45_14
-; CHECK-V-NEXT: # %bb.13: # %entry
+; CHECK-V-NEXT: bnez a5, .LBB45_13
+; CHECK-V-NEXT: .LBB45_12: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: .LBB45_14: # %entry
-; CHECK-V-NEXT: beq a1, a2, .LBB45_16
-; CHECK-V-NEXT: # %bb.15: # %entry
+; CHECK-V-NEXT: .LBB45_13: # %entry
+; CHECK-V-NEXT: beq a1, a2, .LBB45_15
+; CHECK-V-NEXT: # %bb.14: # %entry
; CHECK-V-NEXT: slti a1, a1, 0
; CHECK-V-NEXT: xori a1, a1, 1
-; CHECK-V-NEXT: beqz a1, .LBB45_17
-; CHECK-V-NEXT: j .LBB45_18
-; CHECK-V-NEXT: .LBB45_16:
+; CHECK-V-NEXT: beqz a1, .LBB45_16
+; CHECK-V-NEXT: j .LBB45_17
+; CHECK-V-NEXT: .LBB45_15:
; CHECK-V-NEXT: sltu a1, a3, a0
-; CHECK-V-NEXT: bnez a1, .LBB45_18
-; CHECK-V-NEXT: .LBB45_17: # %entry
+; CHECK-V-NEXT: bnez a1, .LBB45_17
+; CHECK-V-NEXT: .LBB45_16: # %entry
; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: .LBB45_18: # %entry
+; CHECK-V-NEXT: .LBB45_17: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vmv.s.x v9, s0
@@ -6216,27 +6296,28 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-NOV-NEXT: # %bb.10: # %entry
; CHECK-NOV-NEXT: slti a5, a5, 0
; CHECK-NOV-NEXT: xori a5, a5, 1
-; CHECK-NOV-NEXT: j .LBB48_12
+; CHECK-NOV-NEXT: and a1, a4, a1
+; CHECK-NOV-NEXT: beqz a5, .LBB48_12
+; CHECK-NOV-NEXT: j .LBB48_13
; CHECK-NOV-NEXT: .LBB48_11:
; CHECK-NOV-NEXT: sltu a5, a3, s0
-; CHECK-NOV-NEXT: .LBB48_12:
; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: bnez a5, .LBB48_14
-; CHECK-NOV-NEXT: # %bb.13: # %entry
+; CHECK-NOV-NEXT: bnez a5, .LBB48_13
+; CHECK-NOV-NEXT: .LBB48_12: # %entry
; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: .LBB48_14: # %entry
-; CHECK-NOV-NEXT: beq a1, a0, .LBB48_16
-; CHECK-NOV-NEXT: # %bb.15: # %entry
+; CHECK-NOV-NEXT: .LBB48_13: # %entry
+; CHECK-NOV-NEXT: beq a1, a0, .LBB48_15
+; CHECK-NOV-NEXT: # %bb.14: # %entry
; CHECK-NOV-NEXT: slti a0, a1, 0
; CHECK-NOV-NEXT: xori a0, a0, 1
-; CHECK-NOV-NEXT: beqz a0, .LBB48_17
-; CHECK-NOV-NEXT: j .LBB48_18
-; CHECK-NOV-NEXT: .LBB48_16:
+; CHECK-NOV-NEXT: beqz a0, .LBB48_16
+; CHECK-NOV-NEXT: j .LBB48_17
+; CHECK-NOV-NEXT: .LBB48_15:
; CHECK-NOV-NEXT: sltu a0, a3, a2
-; CHECK-NOV-NEXT: bnez a0, .LBB48_18
-; CHECK-NOV-NEXT: .LBB48_17: # %entry
+; CHECK-NOV-NEXT: bnez a0, .LBB48_17
+; CHECK-NOV-NEXT: .LBB48_16: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: .LBB48_18: # %entry
+; CHECK-NOV-NEXT: .LBB48_17: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -6306,27 +6387,28 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: # %bb.10: # %entry
; CHECK-V-NEXT: slti a5, a5, 0
; CHECK-V-NEXT: xori a5, a5, 1
-; CHECK-V-NEXT: j .LBB48_12
+; CHECK-V-NEXT: and a1, a4, a1
+; CHECK-V-NEXT: beqz a5, .LBB48_12
+; CHECK-V-NEXT: j .LBB48_13
; CHECK-V-NEXT: .LBB48_11:
; CHECK-V-NEXT: sltu a5, a3, s0
-; CHECK-V-NEXT: .LBB48_12:
; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: bnez a5, .LBB48_14
-; CHECK-V-NEXT: # %bb.13: # %entry
+; CHECK-V-NEXT: bnez a5, .LBB48_13
+; CHECK-V-NEXT: .LBB48_12: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: .LBB48_14: # %entry
-; CHECK-V-NEXT: beq a1, a2, .LBB48_16
-; CHECK-V-NEXT: # %bb.15: # %entry
+; CHECK-V-NEXT: .LBB48_13: # %entry
+; CHECK-V-NEXT: beq a1, a2, .LBB48_15
+; CHECK-V-NEXT: # %bb.14: # %entry
; CHECK-V-NEXT: slti a1, a1, 0
; CHECK-V-NEXT: xori a1, a1, 1
-; CHECK-V-NEXT: beqz a1, .LBB48_17
-; CHECK-V-NEXT: j .LBB48_18
-; CHECK-V-NEXT: .LBB48_16:
+; CHECK-V-NEXT: beqz a1, .LBB48_16
+; CHECK-V-NEXT: j .LBB48_17
+; CHECK-V-NEXT: .LBB48_15:
; CHECK-V-NEXT: sltu a1, a3, a0
-; CHECK-V-NEXT: bnez a1, .LBB48_18
-; CHECK-V-NEXT: .LBB48_17: # %entry
+; CHECK-V-NEXT: bnez a1, .LBB48_17
+; CHECK-V-NEXT: .LBB48_16: # %entry
; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: .LBB48_18: # %entry
+; CHECK-V-NEXT: .LBB48_17: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vmv.s.x v9, s0
@@ -6621,27 +6703,28 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NOV-NEXT: # %bb.10: # %entry
; CHECK-NOV-NEXT: slti a5, a5, 0
; CHECK-NOV-NEXT: xori a5, a5, 1
-; CHECK-NOV-NEXT: j .LBB51_12
+; CHECK-NOV-NEXT: and a1, a4, a1
+; CHECK-NOV-NEXT: beqz a5, .LBB51_12
+; CHECK-NOV-NEXT: j .LBB51_13
; CHECK-NOV-NEXT: .LBB51_11:
; CHECK-NOV-NEXT: sltu a5, a3, s0
-; CHECK-NOV-NEXT: .LBB51_12:
; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: bnez a5, .LBB51_14
-; CHECK-NOV-NEXT: # %bb.13: # %entry
+; CHECK-NOV-NEXT: bnez a5, .LBB51_13
+; CHECK-NOV-NEXT: .LBB51_12: # %entry
; CHECK-NOV-NEXT: mv s0, a3
-; CHECK-NOV-NEXT: .LBB51_14: # %entry
-; CHECK-NOV-NEXT: beq a1, a0, .LBB51_16
-; CHECK-NOV-NEXT: # %bb.15: # %entry
+; CHECK-NOV-NEXT: .LBB51_13: # %entry
+; CHECK-NOV-NEXT: beq a1, a0, .LBB51_15
+; CHECK-NOV-NEXT: # %bb.14: # %entry
; CHECK-NOV-NEXT: slti a0, a1, 0
; CHECK-NOV-NEXT: xori a0, a0, 1
-; CHECK-NOV-NEXT: beqz a0, .LBB51_17
-; CHECK-NOV-NEXT: j .LBB51_18
-; CHECK-NOV-NEXT: .LBB51_16:
+; CHECK-NOV-NEXT: beqz a0, .LBB51_16
+; CHECK-NOV-NEXT: j .LBB51_17
+; CHECK-NOV-NEXT: .LBB51_15:
; CHECK-NOV-NEXT: sltu a0, a3, a2
-; CHECK-NOV-NEXT: bnez a0, .LBB51_18
-; CHECK-NOV-NEXT: .LBB51_17: # %entry
+; CHECK-NOV-NEXT: bnez a0, .LBB51_17
+; CHECK-NOV-NEXT: .LBB51_16: # %entry
; CHECK-NOV-NEXT: mv a2, a3
-; CHECK-NOV-NEXT: .LBB51_18: # %entry
+; CHECK-NOV-NEXT: .LBB51_17: # %entry
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -6709,27 +6792,28 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-V-NEXT: # %bb.10: # %entry
; CHECK-V-NEXT: slti a5, a5, 0
; CHECK-V-NEXT: xori a5, a5, 1
-; CHECK-V-NEXT: j .LBB51_12
+; CHECK-V-NEXT: and a1, a4, a1
+; CHECK-V-NEXT: beqz a5, .LBB51_12
+; CHECK-V-NEXT: j .LBB51_13
; CHECK-V-NEXT: .LBB51_11:
; CHECK-V-NEXT: sltu a5, a3, s0
-; CHECK-V-NEXT: .LBB51_12:
; CHECK-V-NEXT: and a1, a4, a1
-; CHECK-V-NEXT: bnez a5, .LBB51_14
-; CHECK-V-NEXT: # %bb.13: # %entry
+; CHECK-V-NEXT: bnez a5, .LBB51_13
+; CHECK-V-NEXT: .LBB51_12: # %entry
; CHECK-V-NEXT: mv s0, a3
-; CHECK-V-NEXT: .LBB51_14: # %entry
-; CHECK-V-NEXT: beq a1, a2, .LBB51_16
-; CHECK-V-NEXT: # %bb.15: # %entry
+; CHECK-V-NEXT: .LBB51_13: # %entry
+; CHECK-V-NEXT: beq a1, a2, .LBB51_15
+; CHECK-V-NEXT: # %bb.14: # %entry
; CHECK-V-NEXT: slti a1, a1, 0
; CHECK-V-NEXT: xori a1, a1, 1
-; CHECK-V-NEXT: beqz a1, .LBB51_17
-; CHECK-V-NEXT: j .LBB51_18
-; CHECK-V-NEXT: .LBB51_16:
+; CHECK-V-NEXT: beqz a1, .LBB51_16
+; CHECK-V-NEXT: j .LBB51_17
+; CHECK-V-NEXT: .LBB51_15:
; CHECK-V-NEXT: sltu a1, a3, a0
-; CHECK-V-NEXT: bnez a1, .LBB51_18
-; CHECK-V-NEXT: .LBB51_17: # %entry
+; CHECK-V-NEXT: bnez a1, .LBB51_17
+; CHECK-V-NEXT: .LBB51_16: # %entry
; CHECK-V-NEXT: mv a0, a3
-; CHECK-V-NEXT: .LBB51_18: # %entry
+; CHECK-V-NEXT: .LBB51_17: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v9, a0
; CHECK-V-NEXT: vmv.s.x v8, s0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 6da5c594bf600..39f282a5a3236 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -179,19 +179,19 @@ define <vscale x 1 x double> @test5(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a2, a1, 1
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: bnez a2, .LBB4_4
+; CHECK-NEXT: bnez a2, .LBB4_3
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: vfsub.vv v9, v8, v9
-; CHECK-NEXT: .LBB4_2: # %if.else
; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB4_5
-; CHECK-NEXT: # %bb.3: # %if.then4
+; CHECK-NEXT: beqz a1, .LBB4_4
+; CHECK-NEXT: .LBB4_2: # %if.then4
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB4_4: # %if.then
+; CHECK-NEXT: .LBB4_3: # %if.then
; CHECK-NEXT: vfadd.vv v9, v8, v9
-; CHECK-NEXT: j .LBB4_2
-; CHECK-NEXT: .LBB4_5: # %if.else5
+; CHECK-NEXT: andi a1, a1, 2
+; CHECK-NEXT: bnez a1, .LBB4_2
+; CHECK-NEXT: .LBB4_4: # %if.else5
; CHECK-NEXT: vfmul.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
@@ -236,13 +236,12 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a2, a1, 1
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: bnez a2, .LBB5_4
+; CHECK-NEXT: bnez a2, .LBB5_3
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: vfsub.vv v8, v8, v9
-; CHECK-NEXT: .LBB5_2: # %if.else
; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB5_5
-; CHECK-NEXT: # %bb.3: # %if.then4
+; CHECK-NEXT: beqz a1, .LBB5_4
+; CHECK-NEXT: .LBB5_2: # %if.then4
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: lui a1, %hi(.LCPI5_1)
@@ -253,11 +252,12 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse64.v v9, (a1)
-; CHECK-NEXT: j .LBB5_6
-; CHECK-NEXT: .LBB5_4: # %if.then
+; CHECK-NEXT: j .LBB5_5
+; CHECK-NEXT: .LBB5_3: # %if.then
; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: j .LBB5_2
-; CHECK-NEXT: .LBB5_5: # %if.else5
+; CHECK-NEXT: andi a1, a1, 2
+; CHECK-NEXT: bnez a1, .LBB5_2
+; CHECK-NEXT: .LBB5_4: # %if.else5
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: lui a1, 260096
; CHECK-NEXT: vmv.v.x v9, a1
@@ -267,7 +267,7 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse32.v v9, (a1)
-; CHECK-NEXT: .LBB5_6: # %if.end10
+; CHECK-NEXT: .LBB5_5: # %if.end10
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfmul.vv v8, v8, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/sadd_sat.ll b/llvm/test/CodeGen/RISCV/sadd_sat.ll
index d759db93ae481..ab03ccc4ba590 100644
--- a/llvm/test/CodeGen/RISCV/sadd_sat.ll
+++ b/llvm/test/CodeGen/RISCV/sadd_sat.ll
@@ -116,14 +116,15 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; RV32I-NEXT: lui a1, 8
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: bge a0, a1, .LBB2_3
-; RV32I-NEXT: .LBB2_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lui a1, 1048568
; RV32I-NEXT: bge a1, a0, .LBB2_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB2_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB2_3:
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: j .LBB2_1
+; RV32I-NEXT: lui a1, 1048568
+; RV32I-NEXT: blt a1, a0, .LBB2_2
; RV32I-NEXT: .LBB2_4:
; RV32I-NEXT: lui a0, 1048568
; RV32I-NEXT: ret
@@ -134,14 +135,15 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; RV64I-NEXT: lui a1, 8
; RV64I-NEXT: addiw a1, a1, -1
; RV64I-NEXT: bge a0, a1, .LBB2_3
-; RV64I-NEXT: .LBB2_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lui a1, 1048568
; RV64I-NEXT: bge a1, a0, .LBB2_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB2_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB2_3:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: j .LBB2_1
+; RV64I-NEXT: lui a1, 1048568
+; RV64I-NEXT: blt a1, a0, .LBB2_2
; RV64I-NEXT: .LBB2_4:
; RV64I-NEXT: lui a0, 1048568
; RV64I-NEXT: ret
@@ -175,14 +177,15 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 127
; RV32I-NEXT: bge a0, a1, .LBB3_3
-; RV32I-NEXT: .LBB3_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a1, -128
; RV32I-NEXT: bge a1, a0, .LBB3_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: li a0, 127
-; RV32I-NEXT: j .LBB3_1
+; RV32I-NEXT: li a1, -128
+; RV32I-NEXT: blt a1, a0, .LBB3_2
; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: li a0, -128
; RV32I-NEXT: ret
@@ -192,14 +195,15 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: li a1, 127
; RV64I-NEXT: bge a0, a1, .LBB3_3
-; RV64I-NEXT: .LBB3_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, -128
; RV64I-NEXT: bge a1, a0, .LBB3_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB3_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB3_3:
; RV64I-NEXT: li a0, 127
-; RV64I-NEXT: j .LBB3_1
+; RV64I-NEXT: li a1, -128
+; RV64I-NEXT: blt a1, a0, .LBB3_2
; RV64I-NEXT: .LBB3_4:
; RV64I-NEXT: li a0, -128
; RV64I-NEXT: ret
@@ -231,14 +235,15 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 7
; RV32I-NEXT: bge a0, a1, .LBB4_3
-; RV32I-NEXT: .LBB4_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a1, -8
; RV32I-NEXT: bge a1, a0, .LBB4_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB4_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: li a0, 7
-; RV32I-NEXT: j .LBB4_1
+; RV32I-NEXT: li a1, -8
+; RV32I-NEXT: blt a1, a0, .LBB4_2
; RV32I-NEXT: .LBB4_4:
; RV32I-NEXT: li a0, -8
; RV32I-NEXT: ret
@@ -248,14 +253,15 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: li a1, 7
; RV64I-NEXT: bge a0, a1, .LBB4_3
-; RV64I-NEXT: .LBB4_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, -8
; RV64I-NEXT: bge a1, a0, .LBB4_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_3:
; RV64I-NEXT: li a0, 7
-; RV64I-NEXT: j .LBB4_1
+; RV64I-NEXT: li a1, -8
+; RV64I-NEXT: blt a1, a0, .LBB4_2
; RV64I-NEXT: .LBB4_4:
; RV64I-NEXT: li a0, -8
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll
index d0ef17657da5a..abcf3379d0a6e 100644
--- a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll
@@ -126,14 +126,15 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: addi a1, a2, -1
; RV32I-NEXT: bge a0, a1, .LBB2_3
-; RV32I-NEXT: .LBB2_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lui a1, 1048568
; RV32I-NEXT: bge a1, a0, .LBB2_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB2_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB2_3:
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: j .LBB2_1
+; RV32I-NEXT: lui a1, 1048568
+; RV32I-NEXT: blt a1, a0, .LBB2_2
; RV32I-NEXT: .LBB2_4:
; RV32I-NEXT: lui a0, 1048568
; RV32I-NEXT: ret
@@ -149,14 +150,15 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: addiw a1, a2, -1
; RV64I-NEXT: bge a0, a1, .LBB2_3
-; RV64I-NEXT: .LBB2_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lui a1, 1048568
; RV64I-NEXT: bge a1, a0, .LBB2_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB2_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB2_3:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: j .LBB2_1
+; RV64I-NEXT: lui a1, 1048568
+; RV64I-NEXT: blt a1, a0, .LBB2_2
; RV64I-NEXT: .LBB2_4:
; RV64I-NEXT: lui a0, 1048568
; RV64I-NEXT: ret
@@ -202,14 +204,15 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 127
; RV32I-NEXT: bge a0, a1, .LBB3_3
-; RV32I-NEXT: .LBB3_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a1, -128
; RV32I-NEXT: bge a1, a0, .LBB3_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: li a0, 127
-; RV32I-NEXT: j .LBB3_1
+; RV32I-NEXT: li a1, -128
+; RV32I-NEXT: blt a1, a0, .LBB3_2
; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: li a0, -128
; RV32I-NEXT: ret
@@ -224,14 +227,15 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: li a1, 127
; RV64I-NEXT: bge a0, a1, .LBB3_3
-; RV64I-NEXT: .LBB3_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, -128
; RV64I-NEXT: bge a1, a0, .LBB3_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB3_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB3_3:
; RV64I-NEXT: li a0, 127
-; RV64I-NEXT: j .LBB3_1
+; RV64I-NEXT: li a1, -128
+; RV64I-NEXT: blt a1, a0, .LBB3_2
; RV64I-NEXT: .LBB3_4:
; RV64I-NEXT: li a0, -128
; RV64I-NEXT: ret
@@ -275,14 +279,15 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 7
; RV32I-NEXT: bge a0, a1, .LBB4_3
-; RV32I-NEXT: .LBB4_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a1, -8
; RV32I-NEXT: bge a1, a0, .LBB4_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB4_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: li a0, 7
-; RV32I-NEXT: j .LBB4_1
+; RV32I-NEXT: li a1, -8
+; RV32I-NEXT: blt a1, a0, .LBB4_2
; RV32I-NEXT: .LBB4_4:
; RV32I-NEXT: li a0, -8
; RV32I-NEXT: ret
@@ -297,14 +302,15 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: li a1, 7
; RV64I-NEXT: bge a0, a1, .LBB4_3
-; RV64I-NEXT: .LBB4_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, -8
; RV64I-NEXT: bge a1, a0, .LBB4_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_3:
; RV64I-NEXT: li a0, 7
-; RV64I-NEXT: j .LBB4_1
+; RV64I-NEXT: li a1, -8
+; RV64I-NEXT: blt a1, a0, .LBB4_2
; RV64I-NEXT: .LBB4_4:
; RV64I-NEXT: li a0, -8
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 24aebefbda2b8..249dabba0cc28 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -414,7 +414,7 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV32I-NEXT: bnez t2, .LBB10_14
; RV32I-NEXT: .LBB10_4:
; RV32I-NEXT: beqz a6, .LBB10_6
-; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: .LBB10_5:
; RV32I-NEXT: mv t0, a4
; RV32I-NEXT: .LBB10_6:
; RV32I-NEXT: slli t3, t0, 1
@@ -456,7 +456,8 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV32I-NEXT: beqz t2, .LBB10_4
; RV32I-NEXT: .LBB10_14:
; RV32I-NEXT: mv a4, t1
-; RV32I-NEXT: j .LBB10_4
+; RV32I-NEXT: bnez a6, .LBB10_5
+; RV32I-NEXT: j .LBB10_6
;
; RV64I-LABEL: fshr128_minsize:
; RV64I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/simplify-condbr.ll b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
index bd9a9af530a43..e5a3cc0d8e12f 100644
--- a/llvm/test/CodeGen/RISCV/simplify-condbr.ll
+++ b/llvm/test/CodeGen/RISCV/simplify-condbr.ll
@@ -34,8 +34,8 @@ define fastcc i32 @S_regrepeat(ptr %startposp, i32 %max, i8 %0, i1 %cmp343) noun
; CHECK-NEXT: # %bb.3: # %land.rhs251
; CHECK-NEXT: lw zero, 0(zero)
; CHECK-NEXT: li s0, 1
-; CHECK-NEXT: beqz s0, .LBB0_8
-; CHECK-NEXT: j .LBB0_9
+; CHECK-NEXT: bnez s0, .LBB0_9
+; CHECK-NEXT: j .LBB0_8
; CHECK-NEXT: .LBB0_4: # %sw.bb336
; CHECK-NEXT: mv s1, a0
; CHECK-NEXT: li s0, 0
@@ -111,29 +111,30 @@ define ptr @Perl_pp_refassign(ptr %PL_stack_sp, i1 %tobool.not, i1 %tobool3.not,
; CHECK-LABEL: Perl_pp_refassign:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: beqz a1, .LBB1_6
+; CHECK-NEXT: beqz a1, .LBB1_4
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: andi a2, a2, 1
; CHECK-NEXT: bnez a2, .LBB1_5
-; CHECK-NEXT: # %bb.3: # %cond.true4
+; CHECK-NEXT: .LBB1_2: # %cond.true4
; CHECK-NEXT: ld a0, 0(a0)
; CHECK-NEXT: snez a0, a0
; CHECK-NEXT: beqz a0, .LBB1_5
-; CHECK-NEXT: # %bb.4: # %sw.bb85
+; CHECK-NEXT: # %bb.3: # %sw.bb85
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: ld a0, 0(a1)
; CHECK-NEXT: call Perl_av_store
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: j .LBB1_5
+; CHECK-NEXT: .LBB1_4: # %cond.true
+; CHECK-NEXT: ld a1, 0(a0)
+; CHECK-NEXT: andi a2, a2, 1
+; CHECK-NEXT: beqz a2, .LBB1_2
; CHECK-NEXT: .LBB1_5: # %common.ret
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_6: # %cond.true
-; CHECK-NEXT: ld a1, 0(a0)
-; CHECK-NEXT: j .LBB1_2
entry:
br i1 %tobool.not, label %cond.end, label %cond.true
diff --git a/llvm/test/CodeGen/RISCV/ssub_sat.ll b/llvm/test/CodeGen/RISCV/ssub_sat.ll
index 31afdd82bd41e..cc5cd76e913c6 100644
--- a/llvm/test/CodeGen/RISCV/ssub_sat.ll
+++ b/llvm/test/CodeGen/RISCV/ssub_sat.ll
@@ -96,14 +96,15 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; RV32I-NEXT: lui a1, 8
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: bge a0, a1, .LBB2_3
-; RV32I-NEXT: .LBB2_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lui a1, 1048568
; RV32I-NEXT: bge a1, a0, .LBB2_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB2_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB2_3:
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: j .LBB2_1
+; RV32I-NEXT: lui a1, 1048568
+; RV32I-NEXT: blt a1, a0, .LBB2_2
; RV32I-NEXT: .LBB2_4:
; RV32I-NEXT: lui a0, 1048568
; RV32I-NEXT: ret
@@ -114,14 +115,15 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; RV64I-NEXT: lui a1, 8
; RV64I-NEXT: addiw a1, a1, -1
; RV64I-NEXT: bge a0, a1, .LBB2_3
-; RV64I-NEXT: .LBB2_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lui a1, 1048568
; RV64I-NEXT: bge a1, a0, .LBB2_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB2_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB2_3:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: j .LBB2_1
+; RV64I-NEXT: lui a1, 1048568
+; RV64I-NEXT: blt a1, a0, .LBB2_2
; RV64I-NEXT: .LBB2_4:
; RV64I-NEXT: lui a0, 1048568
; RV64I-NEXT: ret
@@ -155,14 +157,15 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: li a1, 127
; RV32I-NEXT: bge a0, a1, .LBB3_3
-; RV32I-NEXT: .LBB3_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a1, -128
; RV32I-NEXT: bge a1, a0, .LBB3_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: li a0, 127
-; RV32I-NEXT: j .LBB3_1
+; RV32I-NEXT: li a1, -128
+; RV32I-NEXT: blt a1, a0, .LBB3_2
; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: li a0, -128
; RV32I-NEXT: ret
@@ -172,14 +175,15 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: li a1, 127
; RV64I-NEXT: bge a0, a1, .LBB3_3
-; RV64I-NEXT: .LBB3_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, -128
; RV64I-NEXT: bge a1, a0, .LBB3_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB3_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB3_3:
; RV64I-NEXT: li a0, 127
-; RV64I-NEXT: j .LBB3_1
+; RV64I-NEXT: li a1, -128
+; RV64I-NEXT: blt a1, a0, .LBB3_2
; RV64I-NEXT: .LBB3_4:
; RV64I-NEXT: li a0, -128
; RV64I-NEXT: ret
@@ -211,14 +215,15 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: li a1, 7
; RV32I-NEXT: bge a0, a1, .LBB4_3
-; RV32I-NEXT: .LBB4_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a1, -8
; RV32I-NEXT: bge a1, a0, .LBB4_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB4_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: li a0, 7
-; RV32I-NEXT: j .LBB4_1
+; RV32I-NEXT: li a1, -8
+; RV32I-NEXT: blt a1, a0, .LBB4_2
; RV32I-NEXT: .LBB4_4:
; RV32I-NEXT: li a0, -8
; RV32I-NEXT: ret
@@ -228,14 +233,15 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: li a1, 7
; RV64I-NEXT: bge a0, a1, .LBB4_3
-; RV64I-NEXT: .LBB4_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, -8
; RV64I-NEXT: bge a1, a0, .LBB4_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_3:
; RV64I-NEXT: li a0, 7
-; RV64I-NEXT: j .LBB4_1
+; RV64I-NEXT: li a1, -8
+; RV64I-NEXT: blt a1, a0, .LBB4_2
; RV64I-NEXT: .LBB4_4:
; RV64I-NEXT: li a0, -8
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
index 8c341922b887c..0499992b71778 100644
--- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
@@ -106,14 +106,15 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: addi a1, a2, -1
; RV32I-NEXT: bge a0, a1, .LBB2_3
-; RV32I-NEXT: .LBB2_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: lui a1, 1048568
; RV32I-NEXT: bge a1, a0, .LBB2_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB2_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB2_3:
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: j .LBB2_1
+; RV32I-NEXT: lui a1, 1048568
+; RV32I-NEXT: blt a1, a0, .LBB2_2
; RV32I-NEXT: .LBB2_4:
; RV32I-NEXT: lui a0, 1048568
; RV32I-NEXT: ret
@@ -129,14 +130,15 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: addiw a1, a2, -1
; RV64I-NEXT: bge a0, a1, .LBB2_3
-; RV64I-NEXT: .LBB2_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: lui a1, 1048568
; RV64I-NEXT: bge a1, a0, .LBB2_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB2_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB2_3:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: j .LBB2_1
+; RV64I-NEXT: lui a1, 1048568
+; RV64I-NEXT: blt a1, a0, .LBB2_2
; RV64I-NEXT: .LBB2_4:
; RV64I-NEXT: lui a0, 1048568
; RV64I-NEXT: ret
@@ -182,14 +184,15 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: li a1, 127
; RV32I-NEXT: bge a0, a1, .LBB3_3
-; RV32I-NEXT: .LBB3_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a1, -128
; RV32I-NEXT: bge a1, a0, .LBB3_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: li a0, 127
-; RV32I-NEXT: j .LBB3_1
+; RV32I-NEXT: li a1, -128
+; RV32I-NEXT: blt a1, a0, .LBB3_2
; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: li a0, -128
; RV32I-NEXT: ret
@@ -204,14 +207,15 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: li a1, 127
; RV64I-NEXT: bge a0, a1, .LBB3_3
-; RV64I-NEXT: .LBB3_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, -128
; RV64I-NEXT: bge a1, a0, .LBB3_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB3_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB3_3:
; RV64I-NEXT: li a0, 127
-; RV64I-NEXT: j .LBB3_1
+; RV64I-NEXT: li a1, -128
+; RV64I-NEXT: blt a1, a0, .LBB3_2
; RV64I-NEXT: .LBB3_4:
; RV64I-NEXT: li a0, -128
; RV64I-NEXT: ret
@@ -255,14 +259,15 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: li a1, 7
; RV32I-NEXT: bge a0, a1, .LBB4_3
-; RV32I-NEXT: .LBB4_1:
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a1, -8
; RV32I-NEXT: bge a1, a0, .LBB4_4
-; RV32I-NEXT: # %bb.2:
+; RV32I-NEXT: .LBB4_2:
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: li a0, 7
-; RV32I-NEXT: j .LBB4_1
+; RV32I-NEXT: li a1, -8
+; RV32I-NEXT: blt a1, a0, .LBB4_2
; RV32I-NEXT: .LBB4_4:
; RV32I-NEXT: li a0, -8
; RV32I-NEXT: ret
@@ -277,14 +282,15 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: li a1, 7
; RV64I-NEXT: bge a0, a1, .LBB4_3
-; RV64I-NEXT: .LBB4_1:
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, -8
; RV64I-NEXT: bge a1, a0, .LBB4_4
-; RV64I-NEXT: # %bb.2:
+; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_3:
; RV64I-NEXT: li a0, 7
-; RV64I-NEXT: j .LBB4_1
+; RV64I-NEXT: li a1, -8
+; RV64I-NEXT: blt a1, a0, .LBB4_2
; RV64I-NEXT: .LBB4_4:
; RV64I-NEXT: li a0, -8
; RV64I-NEXT: ret
>From aba8697085ef7a281471f2e27a844eff8dcb9afb Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Thu, 10 Apr 2025 14:51:38 -0300
Subject: [PATCH 6/6] Whitespace
Signed-off-by: Mikhail R. Gadelha <mikhail at igalia.com>
---
llvm/test/CodeGen/RISCV/O0-pipeline.ll | 8 ++++----
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 8 ++++----
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index 2a15b64c8651a..32df712503296 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -62,10 +62,10 @@
; CHECK-NEXT: Insert fentry calls
; CHECK-NEXT: Insert XRay ops
; CHECK-NEXT: Implement the 'patchable-function' attribute
-; CHECK-NEXT: MachineDominator Tree Construction
-; CHECK-NEXT: Machine Natural Loop Construction
-; CHECK-NEXT: Machine Block Frequency Analysis
-; CHECK-NEXT: Control Flow Optimizer
+; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Machine Natural Loop Construction
+; CHECK-NEXT: Machine Block Frequency Analysis
+; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
; CHECK-NEXT: Contiguously Lay Out Funclets
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 80fd4d8ba57fc..d3a0b89fc2c36 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -195,10 +195,10 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: RISC-V Late Branch Optimisation Pass
-; CHECK-NEXT: MachineDominator Tree Construction
-; CHECK-NEXT: Machine Natural Loop Construction
-; CHECK-NEXT: Machine Block Frequency Analysis
-; CHECK-NEXT: Control Flow Optimizer
+; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Machine Natural Loop Construction
+; CHECK-NEXT: Machine Block Frequency Analysis
+; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
; CHECK-NEXT: Contiguously Lay Out Funclets
More information about the llvm-commits
mailing list