[llvm] 22d26ae - [RISCV] Optimize (slli (srli (slli X, C1), C1), C2) -> (srli (slli X, C1), C1-C2) (#119567)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 22:58:40 PST 2024
Author: Piotr Fusik
Date: 2024-12-12T07:58:36+01:00
New Revision: 22d26ae3040095c7bfe4e2f1678b9738bf81fd4a
URL: https://github.com/llvm/llvm-project/commit/22d26ae3040095c7bfe4e2f1678b9738bf81fd4a
DIFF: https://github.com/llvm/llvm-project/commit/22d26ae3040095c7bfe4e2f1678b9738bf81fd4a.diff
LOG: [RISCV] Optimize (slli (srli (slli X, C1), C1), C2) -> (srli (slli X, C1), C1-C2) (#119567)
Masking out most significant bits can be done with shl followed by srl
with same shift amount. If this is followed by a shl, we could instead
srl by a smaller amount of bits.
This transform is already implemented in tablegen for masking out
32 most significant bits.
Emits better code for e.g.
float *index(float *p, int i)
{
return p + (i & (1 << 30) - 1);
}
Added:
llvm/test/CodeGen/RISCV/and-shl.ll
Modified:
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index c5432619a36462..c3922e38729dc3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1026,13 +1026,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned ShAmt = N1C->getZExtValue();
uint64_t Mask = N0.getConstantOperandVal(1);
- // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
- // 32 leading zeros and C3 trailing zeros.
if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
unsigned XLen = Subtarget->getXLen();
unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
unsigned TrailingZeros = llvm::countr_zero(Mask);
if (TrailingZeros > 0 && LeadingZeros == 32) {
+ // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
+ // where C2 has 32 leading zeros and C3 trailing zeros.
SDNode *SRLIW = CurDAG->getMachineNode(
RISCV::SRLIW, DL, VT, N0->getOperand(0),
CurDAG->getTargetConstant(TrailingZeros, DL, VT));
@@ -1042,6 +1042,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, SLLI);
return;
}
+ if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
+ XLen - LeadingZeros > 11 && LeadingZeros != 32) {
+ // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
+ // where C2 has C4 leading zeros and no trailing zeros.
+ // This is profitable if the "and" was to be lowered to
+ // (srli (slli X, C4), C4) and not (andi X, C2).
+ // For "LeadingZeros == 32":
+ // - with Zba it's just (slli.uw X, C)
+ // - without Zba a tablegen pattern applies the very same
+ // transform as we would have done here
+ SDNode *SLLI = CurDAG->getMachineNode(
+ RISCV::SLLI, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(LeadingZeros, DL, VT));
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
+ ReplaceNode(Node, SRLI);
+ return;
+ }
}
break;
}
diff --git a/llvm/test/CodeGen/RISCV/and-shl.ll b/llvm/test/CodeGen/RISCV/and-shl.ll
new file mode 100644
index 00000000000000..c3cb5d8e2e37d6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/and-shl.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+
+define i32 @and_0xfff_shl_2(i32 %x) {
+; RV32I-LABEL: and_0xfff_shl_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 20
+; RV32I-NEXT: srli a0, a0, 18
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: and_0xfff_shl_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 52
+; RV64I-NEXT: srli a0, a0, 50
+; RV64I-NEXT: ret
+ %a = and i32 %x, 4095
+ %s = shl i32 %a, 2
+ ret i32 %s
+}
+
+define i32 @and_0x7ff_shl_2(i32 %x) {
+; RV32I-LABEL: and_0x7ff_shl_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, 2047
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: and_0x7ff_shl_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a0, a0, 2047
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: ret
+ %a = and i32 %x, 2047
+ %s = shl i32 %a, 2
+ ret i32 %s
+}
+
+define i64 @and_0xffffffff_shl_2(i64 %x) {
+; RV32I-LABEL: and_0xffffffff_shl_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a2, a0, 2
+; RV32I-NEXT: srli a1, a0, 30
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: and_0xffffffff_shl_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 30
+; RV64I-NEXT: ret
+ %a = and i64 %x, 4294967295
+ %s = shl i64 %a, 2
+ ret i64 %s
+}
+
+define i32 @and_0xfff_shl_2_multi_use(i32 %x) {
+; RV32I-LABEL: and_0xfff_shl_2_multi_use:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 20
+; RV32I-NEXT: srli a0, a0, 20
+; RV32I-NEXT: slli a1, a0, 2
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: and_0xfff_shl_2_multi_use:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 52
+; RV64I-NEXT: srli a0, a0, 52
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+ %a = and i32 %x, 4095
+ %s = shl i32 %a, 2
+ %r = add i32 %a, %s
+ ret i32 %r
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index 7f4483a8f77d9c..ddcb3c3121bc3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -124,42 +124,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
; ZVFH32: # %bb.0:
; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFH32-NEXT: lui a1, 8
; ZVFH32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFH32-NEXT: vmv.x.s a2, v9
-; ZVFH32-NEXT: addi a1, a1, -1
+; ZVFH32-NEXT: vmv.x.s a1, v9
; ZVFH32-NEXT: vslidedown.vi v9, v9, 1
-; ZVFH32-NEXT: vmv.x.s a3, v8
-; ZVFH32-NEXT: and a2, a2, a1
-; ZVFH32-NEXT: vmv.x.s a4, v9
-; ZVFH32-NEXT: and a1, a4, a1
-; ZVFH32-NEXT: slli a4, a3, 17
-; ZVFH32-NEXT: slli a3, a3, 30
-; ZVFH32-NEXT: srli a4, a4, 19
-; ZVFH32-NEXT: slli a1, a1, 15
-; ZVFH32-NEXT: or a2, a2, a3
-; ZVFH32-NEXT: or a1, a2, a1
+; ZVFH32-NEXT: vmv.x.s a2, v8
+; ZVFH32-NEXT: slli a1, a1, 17
+; ZVFH32-NEXT: srli a1, a1, 17
+; ZVFH32-NEXT: slli a3, a2, 30
+; ZVFH32-NEXT: or a1, a1, a3
+; ZVFH32-NEXT: vmv.x.s a3, v9
+; ZVFH32-NEXT: slli a2, a2, 17
+; ZVFH32-NEXT: slli a3, a3, 17
+; ZVFH32-NEXT: srli a2, a2, 19
+; ZVFH32-NEXT: srli a3, a3, 2
+; ZVFH32-NEXT: or a1, a1, a3
; ZVFH32-NEXT: sw a1, 0(a0)
-; ZVFH32-NEXT: sh a4, 4(a0)
+; ZVFH32-NEXT: sh a2, 4(a0)
; ZVFH32-NEXT: ret
;
; ZVFH64-LABEL: fp2si_v3f32_v3i15:
; ZVFH64: # %bb.0:
; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFH64-NEXT: lui a1, 8
-; ZVFH64-NEXT: vmv.x.s a2, v9
-; ZVFH64-NEXT: addiw a1, a1, -1
+; ZVFH64-NEXT: vmv.x.s a1, v9
; ZVFH64-NEXT: vslidedown.vi v8, v9, 1
; ZVFH64-NEXT: vslidedown.vi v9, v9, 2
-; ZVFH64-NEXT: and a2, a2, a1
-; ZVFH64-NEXT: vmv.x.s a3, v8
-; ZVFH64-NEXT: and a1, a3, a1
+; ZVFH64-NEXT: slli a1, a1, 49
+; ZVFH64-NEXT: vmv.x.s a2, v8
; ZVFH64-NEXT: vmv.x.s a3, v9
+; ZVFH64-NEXT: srli a1, a1, 49
+; ZVFH64-NEXT: slli a2, a2, 49
; ZVFH64-NEXT: slli a3, a3, 30
-; ZVFH64-NEXT: slli a1, a1, 15
-; ZVFH64-NEXT: or a2, a2, a3
-; ZVFH64-NEXT: or a1, a2, a1
+; ZVFH64-NEXT: srli a2, a2, 34
+; ZVFH64-NEXT: or a1, a1, a3
+; ZVFH64-NEXT: or a1, a1, a2
; ZVFH64-NEXT: slli a2, a1, 19
; ZVFH64-NEXT: srli a2, a2, 51
; ZVFH64-NEXT: sw a1, 0(a0)
@@ -170,42 +168,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
; ZVFHMIN32: # %bb.0:
; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN32-NEXT: lui a1, 8
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN32-NEXT: vmv.x.s a2, v9
-; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: vmv.x.s a1, v9
; ZVFHMIN32-NEXT: vslidedown.vi v9, v9, 1
-; ZVFHMIN32-NEXT: vmv.x.s a3, v8
-; ZVFHMIN32-NEXT: and a2, a2, a1
-; ZVFHMIN32-NEXT: vmv.x.s a4, v9
-; ZVFHMIN32-NEXT: and a1, a4, a1
-; ZVFHMIN32-NEXT: slli a4, a3, 17
-; ZVFHMIN32-NEXT: slli a3, a3, 30
-; ZVFHMIN32-NEXT: srli a4, a4, 19
-; ZVFHMIN32-NEXT: slli a1, a1, 15
-; ZVFHMIN32-NEXT: or a2, a2, a3
-; ZVFHMIN32-NEXT: or a1, a2, a1
+; ZVFHMIN32-NEXT: vmv.x.s a2, v8
+; ZVFHMIN32-NEXT: slli a1, a1, 17
+; ZVFHMIN32-NEXT: srli a1, a1, 17
+; ZVFHMIN32-NEXT: slli a3, a2, 30
+; ZVFHMIN32-NEXT: or a1, a1, a3
+; ZVFHMIN32-NEXT: vmv.x.s a3, v9
+; ZVFHMIN32-NEXT: slli a2, a2, 17
+; ZVFHMIN32-NEXT: slli a3, a3, 17
+; ZVFHMIN32-NEXT: srli a2, a2, 19
+; ZVFHMIN32-NEXT: srli a3, a3, 2
+; ZVFHMIN32-NEXT: or a1, a1, a3
; ZVFHMIN32-NEXT: sw a1, 0(a0)
-; ZVFHMIN32-NEXT: sh a4, 4(a0)
+; ZVFHMIN32-NEXT: sh a2, 4(a0)
; ZVFHMIN32-NEXT: ret
;
; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15:
; ZVFHMIN64: # %bb.0:
; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN64-NEXT: lui a1, 8
-; ZVFHMIN64-NEXT: vmv.x.s a2, v9
-; ZVFHMIN64-NEXT: addiw a1, a1, -1
+; ZVFHMIN64-NEXT: vmv.x.s a1, v9
; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1
; ZVFHMIN64-NEXT: vslidedown.vi v9, v9, 2
-; ZVFHMIN64-NEXT: and a2, a2, a1
-; ZVFHMIN64-NEXT: vmv.x.s a3, v8
-; ZVFHMIN64-NEXT: and a1, a3, a1
+; ZVFHMIN64-NEXT: slli a1, a1, 49
+; ZVFHMIN64-NEXT: vmv.x.s a2, v8
; ZVFHMIN64-NEXT: vmv.x.s a3, v9
+; ZVFHMIN64-NEXT: srli a1, a1, 49
+; ZVFHMIN64-NEXT: slli a2, a2, 49
; ZVFHMIN64-NEXT: slli a3, a3, 30
-; ZVFHMIN64-NEXT: slli a1, a1, 15
-; ZVFHMIN64-NEXT: or a2, a2, a3
-; ZVFHMIN64-NEXT: or a1, a2, a1
+; ZVFHMIN64-NEXT: srli a2, a2, 34
+; ZVFHMIN64-NEXT: or a1, a1, a3
+; ZVFHMIN64-NEXT: or a1, a1, a2
; ZVFHMIN64-NEXT: slli a2, a1, 19
; ZVFHMIN64-NEXT: srli a2, a2, 51
; ZVFHMIN64-NEXT: sw a1, 0(a0)
@@ -221,42 +217,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
; ZVFH32: # %bb.0:
; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFH32-NEXT: lui a1, 16
; ZVFH32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFH32-NEXT: vmv.x.s a2, v9
-; ZVFH32-NEXT: addi a1, a1, -1
+; ZVFH32-NEXT: vmv.x.s a1, v9
; ZVFH32-NEXT: vslidedown.vi v9, v9, 1
-; ZVFH32-NEXT: vmv.x.s a3, v8
-; ZVFH32-NEXT: and a2, a2, a1
-; ZVFH32-NEXT: vmv.x.s a4, v9
-; ZVFH32-NEXT: and a1, a4, a1
-; ZVFH32-NEXT: slli a4, a3, 17
-; ZVFH32-NEXT: slli a3, a3, 30
-; ZVFH32-NEXT: srli a4, a4, 19
-; ZVFH32-NEXT: slli a1, a1, 15
-; ZVFH32-NEXT: or a2, a2, a3
-; ZVFH32-NEXT: or a1, a2, a1
+; ZVFH32-NEXT: vmv.x.s a2, v8
+; ZVFH32-NEXT: slli a1, a1, 16
+; ZVFH32-NEXT: srli a1, a1, 16
+; ZVFH32-NEXT: slli a3, a2, 30
+; ZVFH32-NEXT: or a1, a1, a3
+; ZVFH32-NEXT: vmv.x.s a3, v9
+; ZVFH32-NEXT: slli a2, a2, 17
+; ZVFH32-NEXT: slli a3, a3, 16
+; ZVFH32-NEXT: srli a2, a2, 19
+; ZVFH32-NEXT: srli a3, a3, 1
+; ZVFH32-NEXT: or a1, a1, a3
; ZVFH32-NEXT: sw a1, 0(a0)
-; ZVFH32-NEXT: sh a4, 4(a0)
+; ZVFH32-NEXT: sh a2, 4(a0)
; ZVFH32-NEXT: ret
;
; ZVFH64-LABEL: fp2ui_v3f32_v3i15:
; ZVFH64: # %bb.0:
; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFH64-NEXT: lui a1, 16
-; ZVFH64-NEXT: vmv.x.s a2, v9
-; ZVFH64-NEXT: addiw a1, a1, -1
+; ZVFH64-NEXT: vmv.x.s a1, v9
; ZVFH64-NEXT: vslidedown.vi v8, v9, 1
; ZVFH64-NEXT: vslidedown.vi v9, v9, 2
-; ZVFH64-NEXT: and a2, a2, a1
-; ZVFH64-NEXT: vmv.x.s a3, v8
-; ZVFH64-NEXT: and a1, a3, a1
+; ZVFH64-NEXT: slli a1, a1, 48
+; ZVFH64-NEXT: vmv.x.s a2, v8
; ZVFH64-NEXT: vmv.x.s a3, v9
+; ZVFH64-NEXT: srli a1, a1, 48
+; ZVFH64-NEXT: slli a2, a2, 48
; ZVFH64-NEXT: slli a3, a3, 30
-; ZVFH64-NEXT: slli a1, a1, 15
-; ZVFH64-NEXT: or a2, a2, a3
-; ZVFH64-NEXT: or a1, a2, a1
+; ZVFH64-NEXT: srli a2, a2, 33
+; ZVFH64-NEXT: or a1, a1, a3
+; ZVFH64-NEXT: or a1, a1, a2
; ZVFH64-NEXT: slli a2, a1, 19
; ZVFH64-NEXT: srli a2, a2, 51
; ZVFH64-NEXT: sw a1, 0(a0)
@@ -267,42 +261,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
; ZVFHMIN32: # %bb.0:
; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN32-NEXT: lui a1, 16
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN32-NEXT: vmv.x.s a2, v9
-; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: vmv.x.s a1, v9
; ZVFHMIN32-NEXT: vslidedown.vi v9, v9, 1
-; ZVFHMIN32-NEXT: vmv.x.s a3, v8
-; ZVFHMIN32-NEXT: and a2, a2, a1
-; ZVFHMIN32-NEXT: vmv.x.s a4, v9
-; ZVFHMIN32-NEXT: and a1, a4, a1
-; ZVFHMIN32-NEXT: slli a4, a3, 17
-; ZVFHMIN32-NEXT: slli a3, a3, 30
-; ZVFHMIN32-NEXT: srli a4, a4, 19
-; ZVFHMIN32-NEXT: slli a1, a1, 15
-; ZVFHMIN32-NEXT: or a2, a2, a3
-; ZVFHMIN32-NEXT: or a1, a2, a1
+; ZVFHMIN32-NEXT: vmv.x.s a2, v8
+; ZVFHMIN32-NEXT: slli a1, a1, 16
+; ZVFHMIN32-NEXT: srli a1, a1, 16
+; ZVFHMIN32-NEXT: slli a3, a2, 30
+; ZVFHMIN32-NEXT: or a1, a1, a3
+; ZVFHMIN32-NEXT: vmv.x.s a3, v9
+; ZVFHMIN32-NEXT: slli a2, a2, 17
+; ZVFHMIN32-NEXT: slli a3, a3, 16
+; ZVFHMIN32-NEXT: srli a2, a2, 19
+; ZVFHMIN32-NEXT: srli a3, a3, 1
+; ZVFHMIN32-NEXT: or a1, a1, a3
; ZVFHMIN32-NEXT: sw a1, 0(a0)
-; ZVFHMIN32-NEXT: sh a4, 4(a0)
+; ZVFHMIN32-NEXT: sh a2, 4(a0)
; ZVFHMIN32-NEXT: ret
;
; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15:
; ZVFHMIN64: # %bb.0:
; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN64-NEXT: lui a1, 16
-; ZVFHMIN64-NEXT: vmv.x.s a2, v9
-; ZVFHMIN64-NEXT: addiw a1, a1, -1
+; ZVFHMIN64-NEXT: vmv.x.s a1, v9
; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1
; ZVFHMIN64-NEXT: vslidedown.vi v9, v9, 2
-; ZVFHMIN64-NEXT: and a2, a2, a1
-; ZVFHMIN64-NEXT: vmv.x.s a3, v8
-; ZVFHMIN64-NEXT: and a1, a3, a1
+; ZVFHMIN64-NEXT: slli a1, a1, 48
+; ZVFHMIN64-NEXT: vmv.x.s a2, v8
; ZVFHMIN64-NEXT: vmv.x.s a3, v9
+; ZVFHMIN64-NEXT: srli a1, a1, 48
+; ZVFHMIN64-NEXT: slli a2, a2, 48
; ZVFHMIN64-NEXT: slli a3, a3, 30
-; ZVFHMIN64-NEXT: slli a1, a1, 15
-; ZVFHMIN64-NEXT: or a2, a2, a3
-; ZVFHMIN64-NEXT: or a1, a2, a1
+; ZVFHMIN64-NEXT: srli a2, a2, 33
+; ZVFHMIN64-NEXT: or a1, a1, a3
+; ZVFHMIN64-NEXT: or a1, a1, a2
; ZVFHMIN64-NEXT: slli a2, a1, 19
; ZVFHMIN64-NEXT: srli a2, a2, 51
; ZVFHMIN64-NEXT: sw a1, 0(a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index e9fd0a19e3eb66..139f7b4e6a0c80 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -3296,11 +3296,11 @@ define <4 x i16> @buildvec_v4i16_pack(i16 %e1, i16 %e2, i16 %e3, i16 %e4) {
; RVA22U64-LABEL: buildvec_v4i16_pack:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: slli a3, a3, 48
-; RVA22U64-NEXT: zext.h a2, a2
+; RVA22U64-NEXT: slli a2, a2, 48
; RVA22U64-NEXT: zext.h a0, a0
-; RVA22U64-NEXT: zext.h a1, a1
-; RVA22U64-NEXT: slli a2, a2, 32
-; RVA22U64-NEXT: slli a1, a1, 16
+; RVA22U64-NEXT: slli a1, a1, 48
+; RVA22U64-NEXT: srli a2, a2, 16
+; RVA22U64-NEXT: srli a1, a1, 32
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 141d54cf585f28..c6e12c52122d27 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -3205,88 +3205,86 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v0
-; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: addiw a1, a1, -1
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-NEXT: andi a2, a1, 1
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v10, a3
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB40_2: # %else
-; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_4
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a3
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB40_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a3, a2, 4
+; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB40_14
+; RV64ZVE32F-NEXT: bnez a2, .LBB40_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB40_15
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: bnez a2, .LBB40_15
; RV64ZVE32F-NEXT: .LBB40_6: # %else8
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB40_16
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: bnez a2, .LBB40_16
; RV64ZVE32F-NEXT: .LBB40_7: # %else11
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_9
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_9
; RV64ZVE32F-NEXT: .LBB40_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
; RV64ZVE32F-NEXT: .LBB40_9: # %else14
-; RV64ZVE32F-NEXT: andi a3, a2, 64
+; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_11
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: .LBB40_11: # %else17
-; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB40_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB40_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: and a1, a2, a1
-; RV64ZVE32F-NEXT: slli a1, a1, 2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-NEXT: slli a1, a1, 48
+; RV64ZVE32F-NEXT: srli a1, a1, 46
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -3298,44 +3296,44 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_6
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_6
; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a3
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_7
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_7
; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB40_8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB40_8
; RV64ZVE32F-NEXT: j .LBB40_9
%eidxs = zext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
@@ -5643,124 +5641,122 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lui a5, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a6, v0
-; RV64ZVE32F-NEXT: andi a3, a6, 1
-; RV64ZVE32F-NEXT: addiw a5, a5, -1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB53_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a5
-; RV64ZVE32F-NEXT: slli a3, a3, 3
+; RV64ZVE32F-NEXT: slli a3, a3, 48
+; RV64ZVE32F-NEXT: srli a3, a3, 45
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: andi a4, a6, 2
+; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB53_4
; RV64ZVE32F-NEXT: .LBB53_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB53_5
; RV64ZVE32F-NEXT: .LBB53_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a6, 2
+; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB53_2
; RV64ZVE32F-NEXT: .LBB53_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
-; RV64ZVE32F-NEXT: and a4, a4, a5
-; RV64ZVE32F-NEXT: slli a4, a4, 3
+; RV64ZVE32F-NEXT: slli a4, a4, 48
+; RV64ZVE32F-NEXT: srli a4, a4, 45
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB53_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a7, a6, 4
+; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: beqz a7, .LBB53_10
+; RV64ZVE32F-NEXT: beqz a6, .LBB53_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a7, v8
-; RV64ZVE32F-NEXT: and a7, a7, a5
-; RV64ZVE32F-NEXT: slli a7, a7, 3
-; RV64ZVE32F-NEXT: add a7, a1, a7
-; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a6, 8
-; RV64ZVE32F-NEXT: bnez t0, .LBB53_11
+; RV64ZVE32F-NEXT: vmv.x.s a6, v8
+; RV64ZVE32F-NEXT: slli a6, a6, 48
+; RV64ZVE32F-NEXT: srli a6, a6, 45
+; RV64ZVE32F-NEXT: add a6, a1, a6
+; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: andi a7, a5, 8
+; RV64ZVE32F-NEXT: bnez a7, .LBB53_11
; RV64ZVE32F-NEXT: .LBB53_7:
-; RV64ZVE32F-NEXT: ld t0, 24(a2)
-; RV64ZVE32F-NEXT: andi t1, a6, 16
-; RV64ZVE32F-NEXT: bnez t1, .LBB53_12
+; RV64ZVE32F-NEXT: ld a7, 24(a2)
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: bnez t0, .LBB53_12
; RV64ZVE32F-NEXT: .LBB53_8:
-; RV64ZVE32F-NEXT: ld t1, 32(a2)
-; RV64ZVE32F-NEXT: andi t2, a6, 32
-; RV64ZVE32F-NEXT: bnez t2, .LBB53_13
+; RV64ZVE32F-NEXT: ld t0, 32(a2)
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: bnez t1, .LBB53_13
; RV64ZVE32F-NEXT: .LBB53_9:
-; RV64ZVE32F-NEXT: ld t2, 40(a2)
+; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB53_14
; RV64ZVE32F-NEXT: .LBB53_10:
-; RV64ZVE32F-NEXT: ld a7, 16(a2)
-; RV64ZVE32F-NEXT: andi t0, a6, 8
-; RV64ZVE32F-NEXT: beqz t0, .LBB53_7
+; RV64ZVE32F-NEXT: ld a6, 16(a2)
+; RV64ZVE32F-NEXT: andi a7, a5, 8
+; RV64ZVE32F-NEXT: beqz a7, .LBB53_7
; RV64ZVE32F-NEXT: .LBB53_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s t0, v8
-; RV64ZVE32F-NEXT: and t0, t0, a5
-; RV64ZVE32F-NEXT: slli t0, t0, 3
+; RV64ZVE32F-NEXT: vmv.x.s a7, v8
+; RV64ZVE32F-NEXT: slli a7, a7, 48
+; RV64ZVE32F-NEXT: srli a7, a7, 45
+; RV64ZVE32F-NEXT: add a7, a1, a7
+; RV64ZVE32F-NEXT: ld a7, 0(a7)
+; RV64ZVE32F-NEXT: andi t0, a5, 16
+; RV64ZVE32F-NEXT: beqz t0, .LBB53_8
+; RV64ZVE32F-NEXT: .LBB53_12: # %cond.load10
+; RV64ZVE32F-NEXT: vmv.x.s t0, v9
+; RV64ZVE32F-NEXT: slli t0, t0, 48
+; RV64ZVE32F-NEXT: srli t0, t0, 45
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a6, 16
-; RV64ZVE32F-NEXT: beqz t1, .LBB53_8
-; RV64ZVE32F-NEXT: .LBB53_12: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s t1, v9
-; RV64ZVE32F-NEXT: and t1, t1, a5
-; RV64ZVE32F-NEXT: slli t1, t1, 3
-; RV64ZVE32F-NEXT: add t1, a1, t1
-; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: andi t2, a6, 32
-; RV64ZVE32F-NEXT: beqz t2, .LBB53_9
+; RV64ZVE32F-NEXT: andi t1, a5, 32
+; RV64ZVE32F-NEXT: beqz t1, .LBB53_9
; RV64ZVE32F-NEXT: .LBB53_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s t2, v8
-; RV64ZVE32F-NEXT: and t2, t2, a5
-; RV64ZVE32F-NEXT: slli t2, t2, 3
-; RV64ZVE32F-NEXT: add t2, a1, t2
-; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: vmv.x.s t1, v8
+; RV64ZVE32F-NEXT: slli t1, t1, 48
+; RV64ZVE32F-NEXT: srli t1, t1, 45
+; RV64ZVE32F-NEXT: add t1, a1, t1
+; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB53_14: # %else14
-; RV64ZVE32F-NEXT: andi t3, a6, 64
+; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz t3, .LBB53_17
+; RV64ZVE32F-NEXT: beqz t2, .LBB53_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
-; RV64ZVE32F-NEXT: vmv.x.s t3, v8
-; RV64ZVE32F-NEXT: and t3, t3, a5
-; RV64ZVE32F-NEXT: slli t3, t3, 3
-; RV64ZVE32F-NEXT: add t3, a1, t3
-; RV64ZVE32F-NEXT: ld t3, 0(t3)
-; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: bnez a6, .LBB53_18
+; RV64ZVE32F-NEXT: vmv.x.s t2, v8
+; RV64ZVE32F-NEXT: slli t2, t2, 48
+; RV64ZVE32F-NEXT: srli t2, t2, 45
+; RV64ZVE32F-NEXT: add t2, a1, t2
+; RV64ZVE32F-NEXT: ld t2, 0(t2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: bnez a5, .LBB53_18
; RV64ZVE32F-NEXT: .LBB53_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB53_19
; RV64ZVE32F-NEXT: .LBB53_17:
-; RV64ZVE32F-NEXT: ld t3, 48(a2)
-; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: beqz a6, .LBB53_16
+; RV64ZVE32F-NEXT: ld t2, 48(a2)
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB53_16
; RV64ZVE32F-NEXT: .LBB53_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: and a2, a2, a5
-; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB53_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
-; RV64ZVE32F-NEXT: sd a7, 16(a0)
-; RV64ZVE32F-NEXT: sd t0, 24(a0)
-; RV64ZVE32F-NEXT: sd t1, 32(a0)
-; RV64ZVE32F-NEXT: sd t2, 40(a0)
-; RV64ZVE32F-NEXT: sd t3, 48(a0)
+; RV64ZVE32F-NEXT: sd a6, 16(a0)
+; RV64ZVE32F-NEXT: sd a7, 24(a0)
+; RV64ZVE32F-NEXT: sd t0, 32(a0)
+; RV64ZVE32F-NEXT: sd t1, 40(a0)
+; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i16> %idxs to <8 x i64>
@@ -10511,32 +10507,30 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v0
-; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: addiw a1, a1, -1
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-NEXT: andi a2, a1, 1
+; RV64ZVE32F-NEXT: beqz a2, .LBB89_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: flw fa5, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB89_2: # %else
-; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_4
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB89_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: flw fa5, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
@@ -10544,55 +10538,55 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: .LBB89_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a3, a2, 4
+; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB89_14
+; RV64ZVE32F-NEXT: bnez a2, .LBB89_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB89_15
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: bnez a2, .LBB89_15
; RV64ZVE32F-NEXT: .LBB89_6: # %else8
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB89_16
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: bnez a2, .LBB89_16
; RV64ZVE32F-NEXT: .LBB89_7: # %else11
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_9
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB89_9
; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: flw fa5, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
; RV64ZVE32F-NEXT: .LBB89_9: # %else14
-; RV64ZVE32F-NEXT: andi a3, a2, 64
+; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_11
+; RV64ZVE32F-NEXT: beqz a2, .LBB89_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: flw fa5, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: .LBB89_11: # %else17
-; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB89_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB89_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: and a1, a2, a1
-; RV64ZVE32F-NEXT: slli a1, a1, 2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-NEXT: slli a1, a1, 48
+; RV64ZVE32F-NEXT: srli a1, a1, 46
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -10604,44 +10598,44 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: flw fa5, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_6
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB89_6
; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: flw fa5, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_7
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB89_7
; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: flw fa5, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB89_8
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB89_8
; RV64ZVE32F-NEXT: j .LBB89_9
%eidxs = zext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
@@ -12482,71 +12476,69 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lui a2, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v0
-; RV64ZVE32F-NEXT: andi a4, a3, 1
-; RV64ZVE32F-NEXT: addiw a2, a2, -1
-; RV64ZVE32F-NEXT: beqz a4, .LBB102_2
+; RV64ZVE32F-NEXT: vmv.x.s a2, v0
+; RV64ZVE32F-NEXT: andi a3, a2, 1
+; RV64ZVE32F-NEXT: beqz a3, .LBB102_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a4, v8
-; RV64ZVE32F-NEXT: and a4, a4, a2
-; RV64ZVE32F-NEXT: slli a4, a4, 3
-; RV64ZVE32F-NEXT: add a4, a1, a4
-; RV64ZVE32F-NEXT: fld fa0, 0(a4)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: slli a3, a3, 48
+; RV64ZVE32F-NEXT: srli a3, a3, 45
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB102_2: # %else
-; RV64ZVE32F-NEXT: andi a4, a3, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB102_4
+; RV64ZVE32F-NEXT: andi a3, a2, 2
+; RV64ZVE32F-NEXT: beqz a3, .LBB102_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a4, v9
-; RV64ZVE32F-NEXT: and a4, a4, a2
-; RV64ZVE32F-NEXT: slli a4, a4, 3
-; RV64ZVE32F-NEXT: add a4, a1, a4
-; RV64ZVE32F-NEXT: fld fa1, 0(a4)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: slli a3, a3, 48
+; RV64ZVE32F-NEXT: srli a3, a3, 45
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB102_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a4, a3, 4
+; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB102_14
+; RV64ZVE32F-NEXT: bnez a3, .LBB102_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
-; RV64ZVE32F-NEXT: andi a4, a3, 8
-; RV64ZVE32F-NEXT: bnez a4, .LBB102_15
+; RV64ZVE32F-NEXT: andi a3, a2, 8
+; RV64ZVE32F-NEXT: bnez a3, .LBB102_15
; RV64ZVE32F-NEXT: .LBB102_6: # %else8
-; RV64ZVE32F-NEXT: andi a4, a3, 16
-; RV64ZVE32F-NEXT: bnez a4, .LBB102_16
+; RV64ZVE32F-NEXT: andi a3, a2, 16
+; RV64ZVE32F-NEXT: bnez a3, .LBB102_16
; RV64ZVE32F-NEXT: .LBB102_7: # %else11
-; RV64ZVE32F-NEXT: andi a4, a3, 32
-; RV64ZVE32F-NEXT: beqz a4, .LBB102_9
+; RV64ZVE32F-NEXT: andi a3, a2, 32
+; RV64ZVE32F-NEXT: beqz a3, .LBB102_9
; RV64ZVE32F-NEXT: .LBB102_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a4, v8
-; RV64ZVE32F-NEXT: and a4, a4, a2
-; RV64ZVE32F-NEXT: slli a4, a4, 3
-; RV64ZVE32F-NEXT: add a4, a1, a4
-; RV64ZVE32F-NEXT: fld fa5, 0(a4)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: slli a3, a3, 48
+; RV64ZVE32F-NEXT: srli a3, a3, 45
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB102_9: # %else14
-; RV64ZVE32F-NEXT: andi a4, a3, 64
+; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB102_11
+; RV64ZVE32F-NEXT: beqz a3, .LBB102_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
-; RV64ZVE32F-NEXT: vmv.x.s a4, v8
-; RV64ZVE32F-NEXT: and a4, a4, a2
-; RV64ZVE32F-NEXT: slli a4, a4, 3
-; RV64ZVE32F-NEXT: add a4, a1, a4
-; RV64ZVE32F-NEXT: fld fa6, 0(a4)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: slli a3, a3, 48
+; RV64ZVE32F-NEXT: srli a3, a3, 45
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB102_11: # %else17
-; RV64ZVE32F-NEXT: andi a3, a3, -128
-; RV64ZVE32F-NEXT: beqz a3, .LBB102_13
+; RV64ZVE32F-NEXT: andi a2, a2, -128
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a2, a3, a2
-; RV64ZVE32F-NEXT: slli a2, a2, 3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 45
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB102_13: # %else20
@@ -12560,30 +12552,30 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB102_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a4, v8
-; RV64ZVE32F-NEXT: and a4, a4, a2
-; RV64ZVE32F-NEXT: slli a4, a4, 3
-; RV64ZVE32F-NEXT: add a4, a1, a4
-; RV64ZVE32F-NEXT: fld fa2, 0(a4)
-; RV64ZVE32F-NEXT: andi a4, a3, 8
-; RV64ZVE32F-NEXT: beqz a4, .LBB102_6
+; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: slli a3, a3, 48
+; RV64ZVE32F-NEXT: srli a3, a3, 45
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa2, 0(a3)
+; RV64ZVE32F-NEXT: andi a3, a2, 8
+; RV64ZVE32F-NEXT: beqz a3, .LBB102_6
; RV64ZVE32F-NEXT: .LBB102_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a4, v8
-; RV64ZVE32F-NEXT: and a4, a4, a2
-; RV64ZVE32F-NEXT: slli a4, a4, 3
-; RV64ZVE32F-NEXT: add a4, a1, a4
-; RV64ZVE32F-NEXT: fld fa3, 0(a4)
-; RV64ZVE32F-NEXT: andi a4, a3, 16
-; RV64ZVE32F-NEXT: beqz a4, .LBB102_7
+; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: slli a3, a3, 48
+; RV64ZVE32F-NEXT: srli a3, a3, 45
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa3, 0(a3)
+; RV64ZVE32F-NEXT: andi a3, a2, 16
+; RV64ZVE32F-NEXT: beqz a3, .LBB102_7
; RV64ZVE32F-NEXT: .LBB102_16: # %cond.load10
-; RV64ZVE32F-NEXT: vmv.x.s a4, v9
-; RV64ZVE32F-NEXT: and a4, a4, a2
-; RV64ZVE32F-NEXT: slli a4, a4, 3
-; RV64ZVE32F-NEXT: add a4, a1, a4
-; RV64ZVE32F-NEXT: fld fa4, 0(a4)
-; RV64ZVE32F-NEXT: andi a4, a3, 32
-; RV64ZVE32F-NEXT: bnez a4, .LBB102_8
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: slli a3, a3, 48
+; RV64ZVE32F-NEXT: srli a3, a3, 45
+; RV64ZVE32F-NEXT: add a3, a1, a3
+; RV64ZVE32F-NEXT: fld fa4, 0(a3)
+; RV64ZVE32F-NEXT: andi a3, a2, 32
+; RV64ZVE32F-NEXT: bnez a3, .LBB102_8
; RV64ZVE32F-NEXT: j .LBB102_9
%eidxs = zext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 575a757149ebba..7ec47269257048 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -2588,123 +2588,121 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v0
-; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: addiw a1, a1, -1
-; RV64ZVE32F-NEXT: beqz a3, .LBB34_2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-NEXT: andi a2, a1, 1
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v10
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v8, (a3)
+; RV64ZVE32F-NEXT: vse32.v v8, (a2)
; RV64ZVE32F-NEXT: .LBB34_2: # %else
-; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB34_4
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v11
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
-; RV64ZVE32F-NEXT: vse32.v v11, (a3)
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
; RV64ZVE32F-NEXT: .LBB34_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
-; RV64ZVE32F-NEXT: andi a3, a2, 4
+; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB34_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB34_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB34_13
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: bnez a2, .LBB34_13
; RV64ZVE32F-NEXT: .LBB34_6: # %else6
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB34_14
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: bnez a2, .LBB34_14
; RV64ZVE32F-NEXT: .LBB34_7: # %else8
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB34_9
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_9
; RV64ZVE32F-NEXT: .LBB34_8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v10
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a3)
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB34_9: # %else10
-; RV64ZVE32F-NEXT: andi a3, a2, 64
+; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB34_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB34_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: bnez a2, .LBB34_16
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
; RV64ZVE32F-NEXT: .LBB34_11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB34_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a3, v10
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB34_6
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_6
; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v10
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB34_7
+; RV64ZVE32F-NEXT: vse32.v v10, (a2)
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB34_7
; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v11
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB34_8
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB34_8
; RV64ZVE32F-NEXT: j .LBB34_9
; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11
-; RV64ZVE32F-NEXT: vmv.x.s a3, v10
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a3)
-; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB34_11
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB34_11
; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: and a1, a2, a1
-; RV64ZVE32F-NEXT: slli a1, a1, 2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v10
+; RV64ZVE32F-NEXT: slli a1, a1, 48
+; RV64ZVE32F-NEXT: srli a1, a1, 46
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -4794,109 +4792,107 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a5, 40(a0)
+; RV64ZVE32F-NEXT: ld a4, 40(a0)
; RV64ZVE32F-NEXT: ld a3, 48(a0)
; RV64ZVE32F-NEXT: ld a2, 56(a0)
-; RV64ZVE32F-NEXT: ld t2, 8(a0)
-; RV64ZVE32F-NEXT: ld t1, 16(a0)
-; RV64ZVE32F-NEXT: ld t0, 24(a0)
-; RV64ZVE32F-NEXT: ld a7, 32(a0)
-; RV64ZVE32F-NEXT: lui a4, 16
+; RV64ZVE32F-NEXT: ld t1, 8(a0)
+; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a7, 24(a0)
+; RV64ZVE32F-NEXT: ld a6, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a6, v0
-; RV64ZVE32F-NEXT: andi t3, a6, 1
-; RV64ZVE32F-NEXT: addiw a4, a4, -1
-; RV64ZVE32F-NEXT: beqz t3, .LBB47_2
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi t2, a5, 1
+; RV64ZVE32F-NEXT: beqz t2, .LBB47_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s t3, v8
-; RV64ZVE32F-NEXT: and t3, t3, a4
-; RV64ZVE32F-NEXT: slli t3, t3, 3
-; RV64ZVE32F-NEXT: add t3, a1, t3
-; RV64ZVE32F-NEXT: sd a0, 0(t3)
+; RV64ZVE32F-NEXT: vmv.x.s t2, v8
+; RV64ZVE32F-NEXT: slli t2, t2, 48
+; RV64ZVE32F-NEXT: srli t2, t2, 45
+; RV64ZVE32F-NEXT: add t2, a1, t2
+; RV64ZVE32F-NEXT: sd a0, 0(t2)
; RV64ZVE32F-NEXT: .LBB47_2: # %else
-; RV64ZVE32F-NEXT: andi a0, a6, 2
+; RV64ZVE32F-NEXT: andi a0, a5, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB47_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
-; RV64ZVE32F-NEXT: and a0, a0, a4
-; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: slli a0, a0, 48
+; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t2, 0(a0)
+; RV64ZVE32F-NEXT: sd t1, 0(a0)
; RV64ZVE32F-NEXT: .LBB47_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a0, a6, 4
+; RV64ZVE32F-NEXT: andi a0, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB47_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a6, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB47_13
; RV64ZVE32F-NEXT: .LBB47_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a6, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB47_14
; RV64ZVE32F-NEXT: .LBB47_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a6, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB47_9
; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: and a0, a0, a4
-; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: slli a0, a0, 48
+; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: .LBB47_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a6, 64
+; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB47_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a6, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB47_16
; RV64ZVE32F-NEXT: .LBB47_11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB47_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: and a0, a0, a4
-; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: slli a0, a0, 48
+; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t1, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a6, 8
+; RV64ZVE32F-NEXT: sd t0, 0(a0)
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB47_6
; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: and a0, a0, a4
-; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: slli a0, a0, 48
+; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a6, 16
+; RV64ZVE32F-NEXT: sd a7, 0(a0)
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB47_7
; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
-; RV64ZVE32F-NEXT: and a0, a0, a4
-; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: slli a0, a0, 48
+; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a6, 32
+; RV64ZVE32F-NEXT: sd a6, 0(a0)
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB47_8
; RV64ZVE32F-NEXT: j .LBB47_9
; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: and a0, a0, a4
-; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: slli a0, a0, 48
+; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a6, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB47_11
; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
-; RV64ZVE32F-NEXT: and a0, a0, a4
-; RV64ZVE32F-NEXT: slli a0, a0, 3
+; RV64ZVE32F-NEXT: slli a0, a0, 48
+; RV64ZVE32F-NEXT: srli a0, a0, 45
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a2, 0(a0)
; RV64ZVE32F-NEXT: ret
@@ -9463,123 +9459,121 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v0
-; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: addiw a1, a1, -1
-; RV64ZVE32F-NEXT: beqz a3, .LBB83_2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-NEXT: andi a2, a1, 1
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v10
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v8, (a3)
+; RV64ZVE32F-NEXT: vse32.v v8, (a2)
; RV64ZVE32F-NEXT: .LBB83_2: # %else
-; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB83_4
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v11
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
-; RV64ZVE32F-NEXT: vse32.v v11, (a3)
+; RV64ZVE32F-NEXT: vse32.v v11, (a2)
; RV64ZVE32F-NEXT: .LBB83_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
-; RV64ZVE32F-NEXT: andi a3, a2, 4
+; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB83_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB83_13
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_13
; RV64ZVE32F-NEXT: .LBB83_6: # %else6
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB83_14
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_14
; RV64ZVE32F-NEXT: .LBB83_7: # %else8
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB83_9
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_9
; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v10
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a3)
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB83_9: # %else10
-; RV64ZVE32F-NEXT: andi a3, a2, 64
+; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB83_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_16
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
; RV64ZVE32F-NEXT: .LBB83_11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a3, v10
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB83_6
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v10
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v10, (a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB83_7
+; RV64ZVE32F-NEXT: vse32.v v10, (a2)
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v11
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v11
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB83_8
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_8
; RV64ZVE32F-NEXT: j .LBB83_9
; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11
-; RV64ZVE32F-NEXT: vmv.x.s a3, v10
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
+; RV64ZVE32F-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v12, (a3)
-; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_11
+; RV64ZVE32F-NEXT: vse32.v v12, (a2)
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB83_11
; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v10
-; RV64ZVE32F-NEXT: and a1, a2, a1
-; RV64ZVE32F-NEXT: slli a1, a1, 2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v10
+; RV64ZVE32F-NEXT: slli a1, a1, 48
+; RV64ZVE32F-NEXT: srli a1, a1, 46
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
@@ -11270,101 +11264,99 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v0
-; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: addiw a1, a1, -1
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-NEXT: andi a2, a1, 1
+; RV64ZVE32F-NEXT: beqz a2, .LBB96_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: fsd fa0, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 45
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
; RV64ZVE32F-NEXT: .LBB96_2: # %else
-; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_4
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB96_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: fsd fa1, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 45
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
; RV64ZVE32F-NEXT: .LBB96_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a3, a2, 4
+; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB96_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB96_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB96_13
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: bnez a2, .LBB96_13
; RV64ZVE32F-NEXT: .LBB96_6: # %else6
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB96_14
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: bnez a2, .LBB96_14
; RV64ZVE32F-NEXT: .LBB96_7: # %else8
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_9
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB96_9
; RV64ZVE32F-NEXT: .LBB96_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: fsd fa5, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 45
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB96_9: # %else10
-; RV64ZVE32F-NEXT: andi a3, a2, 64
+; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB96_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB96_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: bnez a2, .LBB96_16
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: bnez a1, .LBB96_16
; RV64ZVE32F-NEXT: .LBB96_11: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store3
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: fsd fa2, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_6
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 45
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: beqz a2, .LBB96_6
; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: fsd fa3, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_7
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 45
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: beqz a2, .LBB96_7
; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store7
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: fsd fa4, 0(a3)
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB96_8
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 45
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: bnez a2, .LBB96_8
; RV64ZVE32F-NEXT: j .LBB96_9
; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 3
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: fsd fa6, 0(a3)
-; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB96_11
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 45
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB96_11
; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: and a1, a2, a1
-; RV64ZVE32F-NEXT: slli a1, a1, 3
+; RV64ZVE32F-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-NEXT: slli a1, a1, 48
+; RV64ZVE32F-NEXT: srli a1, a1, 45
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
; RV64ZVE32F-NEXT: ret
More information about the llvm-commits
mailing list