[llvm] 9049cf7 - [RISCV] Add constraint for RVV indexed loads.

Zakk Chen via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 26 07:23:57 PDT 2021


Author: Zakk Chen
Date: 2021-03-26T07:23:24-07:00
New Revision: 9049cf77e39443b479b5c8f56f88b93c698c7876

URL: https://github.com/llvm/llvm-project/commit/9049cf77e39443b479b5c8f56f88b93c698c7876
DIFF: https://github.com/llvm/llvm-project/commit/9049cf77e39443b479b5c8f56f88b93c698c7876.diff

LOG: [RISCV] Add constraint for RVV indexed loads.

Add the constraint when destination EEW not equals the source EEW for
correctness.

The RVV spec has three register overlap rules and I implement the first
stricter constraint because the others are difficult to enforce.

Reviewed By: frasercrmck, craig.topper

Differential Revision: https://reviews.llvm.org/D98920

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
    llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
    llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 583b6393581f..98e14a83b4da 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -670,7 +670,7 @@ class VPseudoSLoadMask<VReg RetClass, bits<7> EEW>:
 }
 
 class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
-                         bit Ordered>:
+                         bit Ordered, bit EarlyClobber>:
       Pseudo<(outs RetClass:$rd),
              (ins GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
       RISCVVPseudo,
@@ -683,11 +683,12 @@ class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
+  let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd", "");
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
 class VPseudoILoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
-                       bit Ordered>:
+                       bit Ordered, bit EarlyClobber>:
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
               (ins GetVRegNoV0<RetClass>.R:$merge,
                    GPR:$rs1, IdxClass:$rs2,
@@ -698,7 +699,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
   let mayStore = 0;
   let hasSideEffects = 0;
   let usesCustomInserter = 1;
-  let Constraints = "$rd = $merge";
+  let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $merge", "$rd = $merge");
   let Uses = [VL, VTYPE];
   let HasVLOp = 1;
   let HasSEWOp = 1;
@@ -1364,11 +1365,12 @@ multiclass VPseudoILoad<bit Ordered> {
           defvar idx_lmul = !cast<LMULInfo>("V_" # IdxLInfo);
           defvar Vreg = lmul.vrclass;
           defvar IdxVreg = idx_lmul.vrclass;
+          defvar HasConstraint = !ne(sew, eew);
           let VLMul = lmul.value in {
             def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo :
-              VPseudoILoadNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>;
+              VPseudoILoadNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>;
             def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" :
-              VPseudoILoadMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>;
+              VPseudoILoadMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>;
           }
         }
       }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 5cad41fed7fb..2505f97c4405 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -194,13 +194,15 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x i8*> %ptrs, <4 x i8> %passthru) {
 ; RV32-LABEL: mgather_truemask_v4i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
-; RV32-NEXT:    vloxei32.v v8, (zero), v8
+; RV32-NEXT:    vloxei32.v v25, (zero), v8
+; RV32-NEXT:    vmv1r.v v8, v25
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_v4i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a0, 4, e8,mf4,ta,mu
-; RV64-NEXT:    vloxei64.v v8, (zero), v8
+; RV64-NEXT:    vloxei64.v v25, (zero), v8
+; RV64-NEXT:    vmv1r.v v8, v25
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
   %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> undef, <4 x i32> zeroinitializer
@@ -413,13 +415,15 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x i16*> %ptrs, <4 x i16> %passthru)
 ; RV32-LABEL: mgather_truemask_v4i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
-; RV32-NEXT:    vloxei32.v v8, (zero), v8
+; RV32-NEXT:    vloxei32.v v25, (zero), v8
+; RV32-NEXT:    vmv1r.v v8, v25
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_v4i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
-; RV64-NEXT:    vloxei64.v v8, (zero), v8
+; RV64-NEXT:    vloxei64.v v25, (zero), v8
+; RV64-NEXT:    vmv1r.v v8, v25
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
   %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> undef, <4 x i32> zeroinitializer
@@ -675,7 +679,8 @@ define <4 x i32> @mgather_truemask_v4i32(<4 x i32*> %ptrs, <4 x i32> %passthru)
 ; RV64-LABEL: mgather_truemask_v4i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
-; RV64-NEXT:    vloxei64.v v8, (zero), v8
+; RV64-NEXT:    vloxei64.v v25, (zero), v8
+; RV64-NEXT:    vmv1r.v v8, v25
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
   %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> undef, <4 x i32> zeroinitializer
@@ -959,7 +964,8 @@ define <4 x i64> @mgather_truemask_v4i64(<4 x i64*> %ptrs, <4 x i64> %passthru)
 ; RV32-LABEL: mgather_truemask_v4i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
-; RV32-NEXT:    vloxei32.v v8, (zero), v8
+; RV32-NEXT:    vloxei32.v v26, (zero), v8
+; RV32-NEXT:    vmv2r.v v8, v26
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_v4i64:
@@ -1324,13 +1330,15 @@ define <4 x half> @mgather_truemask_v4f16(<4 x half*> %ptrs, <4 x half> %passthr
 ; RV32-LABEL: mgather_truemask_v4f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
-; RV32-NEXT:    vloxei32.v v8, (zero), v8
+; RV32-NEXT:    vloxei32.v v25, (zero), v8
+; RV32-NEXT:    vmv1r.v v8, v25
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_v4f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a0, 4, e16,mf2,ta,mu
-; RV64-NEXT:    vloxei64.v v8, (zero), v8
+; RV64-NEXT:    vloxei64.v v25, (zero), v8
+; RV64-NEXT:    vmv1r.v v8, v25
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
   %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> undef, <4 x i32> zeroinitializer
@@ -1544,7 +1552,8 @@ define <4 x float> @mgather_truemask_v4f32(<4 x float*> %ptrs, <4 x float> %pass
 ; RV64-LABEL: mgather_truemask_v4f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
-; RV64-NEXT:    vloxei64.v v8, (zero), v8
+; RV64-NEXT:    vloxei64.v v25, (zero), v8
+; RV64-NEXT:    vmv1r.v v8, v25
 ; RV64-NEXT:    ret
   %mhead = insertelement <4 x i1> undef, i1 1, i32 0
   %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> undef, <4 x i32> zeroinitializer
@@ -1828,7 +1837,8 @@ define <4 x double> @mgather_truemask_v4f64(<4 x double*> %ptrs, <4 x double> %p
 ; RV32-LABEL: mgather_truemask_v4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli a0, 4, e64,m2,ta,mu
-; RV32-NEXT:    vloxei32.v v8, (zero), v8
+; RV32-NEXT:    vloxei32.v v26, (zero), v8
+; RV32-NEXT:    vmv2r.v v8, v26
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_v4f64:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
index d567ff9a0140..72321ead1eed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
@@ -196,13 +196,15 @@ define <vscale x 4 x i8> @mgather_truemask_nxv4i8(<vscale x 4 x i8*> %ptrs, <vsc
 ; RV32-LABEL: mgather_truemask_nxv4i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
-; RV32-NEXT:    vloxei32.v v8, (zero), v8
+; RV32-NEXT:    vloxei32.v v25, (zero), v8
+; RV32-NEXT:    vmv1r.v v8, v25
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
-; RV64-NEXT:    vloxei64.v v8, (zero), v8
+; RV64-NEXT:    vloxei64.v v25, (zero), v8
+; RV64-NEXT:    vmv1r.v v8, v25
 ; RV64-NEXT:    ret
   %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
   %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
@@ -417,13 +419,15 @@ define <vscale x 4 x i16> @mgather_truemask_nxv4i16(<vscale x 4 x i16*> %ptrs, <
 ; RV32-LABEL: mgather_truemask_nxv4i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
-; RV32-NEXT:    vloxei32.v v8, (zero), v8
+; RV32-NEXT:    vloxei32.v v25, (zero), v8
+; RV32-NEXT:    vmv1r.v v8, v25
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
-; RV64-NEXT:    vloxei64.v v8, (zero), v8
+; RV64-NEXT:    vloxei64.v v25, (zero), v8
+; RV64-NEXT:    vmv1r.v v8, v25
 ; RV64-NEXT:    ret
   %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
   %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
@@ -681,7 +685,8 @@ define <vscale x 4 x i32> @mgather_truemask_nxv4i32(<vscale x 4 x i32*> %ptrs, <
 ; RV64-LABEL: mgather_truemask_nxv4i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
-; RV64-NEXT:    vloxei64.v v8, (zero), v8
+; RV64-NEXT:    vloxei64.v v26, (zero), v8
+; RV64-NEXT:    vmv2r.v v8, v26
 ; RV64-NEXT:    ret
   %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
   %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
@@ -965,7 +970,8 @@ define <vscale x 4 x i64> @mgather_truemask_nxv4i64(<vscale x 4 x i64*> %ptrs, <
 ; RV32-LABEL: mgather_truemask_nxv4i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
-; RV32-NEXT:    vloxei32.v v8, (zero), v8
+; RV32-NEXT:    vloxei32.v v28, (zero), v8
+; RV32-NEXT:    vmv4r.v v8, v28
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4i64:
@@ -1396,13 +1402,15 @@ define <vscale x 4 x half> @mgather_truemask_nxv4f16(<vscale x 4 x half*> %ptrs,
 ; RV32-LABEL: mgather_truemask_nxv4f16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
-; RV32-NEXT:    vloxei32.v v8, (zero), v8
+; RV32-NEXT:    vloxei32.v v25, (zero), v8
+; RV32-NEXT:    vmv1r.v v8, v25
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4f16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
-; RV64-NEXT:    vloxei64.v v8, (zero), v8
+; RV64-NEXT:    vloxei64.v v25, (zero), v8
+; RV64-NEXT:    vmv1r.v v8, v25
 ; RV64-NEXT:    ret
   %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
   %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
@@ -1616,7 +1624,8 @@ define <vscale x 4 x float> @mgather_truemask_nxv4f32(<vscale x 4 x float*> %ptr
 ; RV64-LABEL: mgather_truemask_nxv4f32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
-; RV64-NEXT:    vloxei64.v v8, (zero), v8
+; RV64-NEXT:    vloxei64.v v26, (zero), v8
+; RV64-NEXT:    vmv2r.v v8, v26
 ; RV64-NEXT:    ret
   %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
   %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
@@ -1900,7 +1909,8 @@ define <vscale x 4 x double> @mgather_truemask_nxv4f64(<vscale x 4 x double*> %p
 ; RV32-LABEL: mgather_truemask_nxv4f64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
-; RV32-NEXT:    vloxei32.v v8, (zero), v8
+; RV32-NEXT:    vloxei32.v v28, (zero), v8
+; RV32-NEXT:    vmv4r.v v8, v28
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mgather_truemask_nxv4f64:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll
index e347c434ff10..cfc54485a741 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll
@@ -10,7 +10,8 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32(
@@ -54,7 +55,8 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32(
@@ -98,7 +100,8 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32(
@@ -142,7 +145,8 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32(
@@ -186,7 +190,8 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32(
@@ -230,7 +235,8 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32(
@@ -274,7 +280,8 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32(
@@ -318,7 +325,8 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32(
@@ -362,7 +370,8 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32(
@@ -406,7 +415,8 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i32(<vscal
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32(
@@ -670,7 +680,8 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32(
@@ -714,7 +725,8 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32(
@@ -758,7 +770,8 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32(
@@ -802,7 +815,8 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32(
@@ -846,7 +860,8 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i32(<vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32(
@@ -1110,7 +1125,8 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i32(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32(
@@ -1154,7 +1170,8 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32(
@@ -1198,7 +1215,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32(
@@ -1242,7 +1260,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32(
@@ -1286,7 +1305,8 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16(
@@ -1330,7 +1350,8 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16(
@@ -1374,7 +1395,8 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16(
@@ -1418,7 +1440,8 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16(
@@ -1462,7 +1485,8 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16(
@@ -1506,7 +1530,8 @@ define <vscale x 32 x i8> @intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16(
@@ -1814,7 +1839,8 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16(
@@ -1858,7 +1884,8 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16(
@@ -1902,7 +1929,8 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
@@ -1946,7 +1974,8 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16(
@@ -1990,7 +2019,8 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16(<vscal
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16(
@@ -2298,7 +2328,8 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16(
@@ -2342,7 +2373,8 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16(
@@ -2386,7 +2418,8 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16(
@@ -2430,7 +2463,8 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16(
@@ -2474,7 +2508,8 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16(<vsc
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16(
@@ -2518,7 +2553,8 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16(
@@ -2562,7 +2598,8 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16(
@@ -2606,7 +2643,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16(
@@ -2650,7 +2688,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16(
@@ -3002,7 +3041,8 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8(
@@ -3046,7 +3086,8 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8(
@@ -3090,7 +3131,8 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8(
@@ -3134,7 +3176,8 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8(
@@ -3178,7 +3221,8 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8(
@@ -3222,7 +3266,8 @@ define <vscale x 32 x i16> @intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8(
@@ -3266,7 +3311,8 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8(
@@ -3310,7 +3356,8 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8(
@@ -3354,7 +3401,8 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8(
@@ -3398,7 +3446,8 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8(
@@ -3442,7 +3491,8 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8(
@@ -3486,7 +3536,8 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i8(
@@ -3530,7 +3581,8 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i8(
@@ -3574,7 +3626,8 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i8(
@@ -3618,7 +3671,8 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8(
@@ -3662,7 +3716,8 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8(<vscal
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8(
@@ -3706,7 +3761,8 @@ define <vscale x 32 x half> @intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8(<vscal
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8(
@@ -3750,7 +3806,8 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8(
@@ -3794,7 +3851,8 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8(
@@ -3838,7 +3896,8 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8(
@@ -3882,7 +3941,8 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8(
@@ -3926,7 +3986,8 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8(<vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8(
@@ -3970,7 +4031,8 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8(
@@ -4014,7 +4076,8 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8(
@@ -4058,7 +4121,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8(
@@ -4102,7 +4166,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
index 3ac5dc3f5746..05204e671f2c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
@@ -10,7 +10,8 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64(
@@ -54,7 +55,8 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64(
@@ -98,7 +100,8 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64(
@@ -142,7 +145,8 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i64(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64(
@@ -186,7 +190,8 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64(
@@ -230,7 +235,8 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64(
@@ -274,7 +280,8 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64(
@@ -318,7 +325,8 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64(
@@ -362,7 +370,8 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64(
@@ -406,7 +415,8 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64(
@@ -450,7 +460,8 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
@@ -494,7 +505,8 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64(
@@ -714,7 +726,8 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i64(
@@ -758,7 +771,8 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i64(
@@ -802,7 +816,8 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i64(
@@ -846,7 +861,8 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i64(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i64(
@@ -890,7 +906,8 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i64(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i64(
@@ -934,7 +951,8 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i64(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i64(
@@ -978,7 +996,8 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i64(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i64(
@@ -1022,7 +1041,8 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i64(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vloxei64.v v8, (a0), v8
+; CHECK-NEXT:    vloxei64.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i64(
@@ -1242,7 +1262,8 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32(
@@ -1286,7 +1307,8 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32(
@@ -1330,7 +1352,8 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32(
@@ -1374,7 +1397,8 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32(
@@ -1418,7 +1442,8 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32(
@@ -1462,7 +1487,8 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32(
@@ -1506,7 +1532,8 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32(
@@ -1550,7 +1577,8 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32(
@@ -1594,7 +1622,8 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32(
@@ -1638,7 +1667,8 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i32(<vscal
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32(
@@ -1902,7 +1932,8 @@ define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32(
@@ -1946,7 +1977,8 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32(
@@ -1990,7 +2022,8 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32(
@@ -2034,7 +2067,8 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32(
@@ -2078,7 +2112,8 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32(
@@ -2122,7 +2157,8 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32(
@@ -2166,7 +2202,8 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32(
@@ -2210,7 +2247,8 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32(
@@ -2254,7 +2292,8 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i32(<vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32(
@@ -2518,7 +2557,8 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i32(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32(
@@ -2562,7 +2602,8 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32(
@@ -2606,7 +2647,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32(
@@ -2650,7 +2692,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vloxei32.v v8, (a0), v8
+; CHECK-NEXT:    vloxei32.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32(
@@ -2694,7 +2737,8 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16(
@@ -2738,7 +2782,8 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16(
@@ -2782,7 +2827,8 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16(
@@ -2826,7 +2872,8 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16(
@@ -2870,7 +2917,8 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16(
@@ -2914,7 +2962,8 @@ define <vscale x 32 x i8> @intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16(
@@ -3222,7 +3271,8 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16(
@@ -3266,7 +3316,8 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16(
@@ -3310,7 +3361,8 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
@@ -3354,7 +3406,8 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16(
@@ -3398,7 +3451,8 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16(<vscal
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16(
@@ -3442,7 +3496,8 @@ define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16(
@@ -3486,7 +3541,8 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16(
@@ -3530,7 +3586,8 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16(
@@ -3574,7 +3631,8 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16(
@@ -3882,7 +3940,8 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16(
@@ -3926,7 +3985,8 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16(
@@ -3970,7 +4030,8 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16(
@@ -4014,7 +4075,8 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16(
@@ -4058,7 +4120,8 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16(<vsc
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16(
@@ -4102,7 +4165,8 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16(
@@ -4146,7 +4210,8 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16(
@@ -4190,7 +4255,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16(
@@ -4234,7 +4300,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vloxei16.v v8, (a0), v8
+; CHECK-NEXT:    vloxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16(
@@ -4586,7 +4653,8 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8(
@@ -4630,7 +4698,8 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8(
@@ -4674,7 +4743,8 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8(
@@ -4718,7 +4788,8 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8(
@@ -4762,7 +4833,8 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8(
@@ -4806,7 +4878,8 @@ define <vscale x 32 x i16> @intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8(
@@ -4850,7 +4923,8 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8(
@@ -4894,7 +4968,8 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8(
@@ -4938,7 +5013,8 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8(
@@ -4982,7 +5058,8 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8(
@@ -5026,7 +5103,8 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8(
@@ -5070,7 +5148,8 @@ define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8(
@@ -5114,7 +5193,8 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8(
@@ -5158,7 +5238,8 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8(
@@ -5202,7 +5283,8 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8(
@@ -5246,7 +5328,8 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i8(
@@ -5290,7 +5373,8 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i8(
@@ -5334,7 +5418,8 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i8(
@@ -5378,7 +5463,8 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8(
@@ -5422,7 +5508,8 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8(<vscal
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8(
@@ -5466,7 +5553,8 @@ define <vscale x 32 x half> @intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8(<vscal
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8(
@@ -5510,7 +5598,8 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8(
@@ -5554,7 +5643,8 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8(
@@ -5598,7 +5688,8 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8(
@@ -5642,7 +5733,8 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8(<vscale x
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8(
@@ -5686,7 +5778,8 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8(<vsca
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8(
@@ -5730,7 +5823,8 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8(
@@ -5774,7 +5868,8 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8(
@@ -5818,7 +5913,8 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8(
@@ -5862,7 +5958,8 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8(<vscale
 ; CHECK-LABEL: intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vloxei8.v v8, (a0), v8
+; CHECK-NEXT:    vloxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll
index 8b218263bb4a..edc4855600b7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll
@@ -10,7 +10,8 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32(
@@ -54,7 +55,8 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32(
@@ -98,7 +100,8 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32(
@@ -142,7 +145,8 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32(
@@ -186,7 +190,8 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32(
@@ -230,7 +235,8 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32(
@@ -274,7 +280,8 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32(
@@ -318,7 +325,8 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32(
@@ -362,7 +370,8 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32(
@@ -406,7 +415,8 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i32(<vscal
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32(
@@ -670,7 +680,8 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32(
@@ -714,7 +725,8 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32(
@@ -758,7 +770,8 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32(
@@ -802,7 +815,8 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32(
@@ -846,7 +860,8 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i32(<vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32(
@@ -1110,7 +1125,8 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i32(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32(
@@ -1154,7 +1170,8 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32(
@@ -1198,7 +1215,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32(
@@ -1242,7 +1260,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32(
@@ -1286,7 +1305,8 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16(
@@ -1330,7 +1350,8 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16(
@@ -1374,7 +1395,8 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16(
@@ -1418,7 +1440,8 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16(
@@ -1462,7 +1485,8 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16(
@@ -1506,7 +1530,8 @@ define <vscale x 32 x i8> @intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16(
@@ -1814,7 +1839,8 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16(
@@ -1858,7 +1884,8 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16(
@@ -1902,7 +1929,8 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16(
@@ -1946,7 +1974,8 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16(
@@ -1990,7 +2019,8 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16(<vscal
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16(
@@ -2298,7 +2328,8 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16(
@@ -2342,7 +2373,8 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16(
@@ -2386,7 +2418,8 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16(
@@ -2430,7 +2463,8 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16(
@@ -2474,7 +2508,8 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16(<vsc
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16(
@@ -2518,7 +2553,8 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16(
@@ -2562,7 +2598,8 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16(
@@ -2606,7 +2643,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16(
@@ -2650,7 +2688,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16(
@@ -3002,7 +3041,8 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8(
@@ -3046,7 +3086,8 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8(
@@ -3090,7 +3131,8 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8(
@@ -3134,7 +3176,8 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8(
@@ -3178,7 +3221,8 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8(
@@ -3222,7 +3266,8 @@ define <vscale x 32 x i16> @intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8(
@@ -3266,7 +3311,8 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8(
@@ -3310,7 +3356,8 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8(
@@ -3354,7 +3401,8 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8(
@@ -3398,7 +3446,8 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8(
@@ -3442,7 +3491,8 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8(
@@ -3486,7 +3536,8 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i8(
@@ -3530,7 +3581,8 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i8(
@@ -3574,7 +3626,8 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i8(
@@ -3618,7 +3671,8 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8(
@@ -3662,7 +3716,8 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8(<vscal
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8(
@@ -3706,7 +3761,8 @@ define <vscale x 32 x half> @intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8(<vscal
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8(
@@ -3750,7 +3806,8 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8(
@@ -3794,7 +3851,8 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8(
@@ -3838,7 +3896,8 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8(
@@ -3882,7 +3941,8 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8(
@@ -3926,7 +3986,8 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8(<vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8(
@@ -3970,7 +4031,8 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8(
@@ -4014,7 +4076,8 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8(
@@ -4058,7 +4121,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8(
@@ -4102,7 +4166,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
index cef3916834ac..77136f2ef1ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
@@ -10,7 +10,8 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64(
@@ -54,7 +55,8 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i64(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64(
@@ -98,7 +100,8 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i64(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64(
@@ -142,7 +145,8 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i64(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64(
@@ -186,7 +190,8 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64(
@@ -230,7 +235,8 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64(
@@ -274,7 +280,8 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64(
@@ -318,7 +325,8 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64(
@@ -362,7 +370,8 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64(
@@ -406,7 +415,8 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(
@@ -450,7 +460,8 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64(
@@ -494,7 +505,8 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64(
@@ -714,7 +726,8 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i64(
@@ -758,7 +771,8 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i64(
@@ -802,7 +816,8 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i64(
@@ -846,7 +861,8 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i64(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i64(
@@ -890,7 +906,8 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i64(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i64(
@@ -934,7 +951,8 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i64(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i64(
@@ -978,7 +996,8 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i64(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i64(
@@ -1022,7 +1041,8 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i64(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vluxei64.v v8, (a0), v8
+; CHECK-NEXT:    vluxei64.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i64(
@@ -1242,7 +1262,8 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32(
@@ -1286,7 +1307,8 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32(
@@ -1330,7 +1352,8 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32(
@@ -1374,7 +1397,8 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32(
@@ -1418,7 +1442,8 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32(
@@ -1462,7 +1487,8 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32(
@@ -1506,7 +1532,8 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32(
@@ -1550,7 +1577,8 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32(
@@ -1594,7 +1622,8 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32(
@@ -1638,7 +1667,8 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i32(<vscal
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32(
@@ -1902,7 +1932,8 @@ define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32(
@@ -1946,7 +1977,8 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32(
@@ -1990,7 +2022,8 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32(
@@ -2034,7 +2067,8 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32(
@@ -2078,7 +2112,8 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32(
@@ -2122,7 +2157,8 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32(
@@ -2166,7 +2202,8 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32(
@@ -2210,7 +2247,8 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i32(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32(
@@ -2254,7 +2292,8 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i32(<vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32(
@@ -2518,7 +2557,8 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i32(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32(
@@ -2562,7 +2602,8 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32(
@@ -2606,7 +2647,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32(
@@ -2650,7 +2692,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vluxei32.v v8, (a0), v8
+; CHECK-NEXT:    vluxei32.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32(
@@ -2694,7 +2737,8 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16(
@@ -2738,7 +2782,8 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16(
@@ -2782,7 +2827,8 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16(
@@ -2826,7 +2872,8 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16(
@@ -2870,7 +2917,8 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16(
@@ -2914,7 +2962,8 @@ define <vscale x 32 x i8> @intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e8,m4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16(
@@ -3222,7 +3271,8 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16(
@@ -3266,7 +3316,8 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16(
@@ -3310,7 +3361,8 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16(
@@ -3354,7 +3406,8 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16(
@@ -3398,7 +3451,8 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16(<vscal
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16(
@@ -3442,7 +3496,8 @@ define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16(
@@ -3486,7 +3541,8 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16(
@@ -3530,7 +3586,8 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16(
@@ -3574,7 +3631,8 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i16(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16(
@@ -3882,7 +3940,8 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16(
@@ -3926,7 +3985,8 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16(
@@ -3970,7 +4030,8 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16(
@@ -4014,7 +4075,8 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16(
@@ -4058,7 +4120,8 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16(<vsc
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16(
@@ -4102,7 +4165,8 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16(
@@ -4146,7 +4210,8 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16(
@@ -4190,7 +4255,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16(
@@ -4234,7 +4300,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vluxei16.v v8, (a0), v8
+; CHECK-NEXT:    vluxei16.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16(
@@ -4586,7 +4653,8 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8(
@@ -4630,7 +4698,8 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8(
@@ -4674,7 +4743,8 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8(
@@ -4718,7 +4788,8 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8(
@@ -4762,7 +4833,8 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8(
@@ -4806,7 +4878,8 @@ define <vscale x 32 x i16> @intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8(
@@ -4850,7 +4923,8 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8(
@@ -4894,7 +4968,8 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8(
@@ -4938,7 +5013,8 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8(
@@ -4982,7 +5058,8 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8(
@@ -5026,7 +5103,8 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8(
@@ -5070,7 +5148,8 @@ define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8(
@@ -5114,7 +5193,8 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8(
@@ -5158,7 +5238,8 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8(
@@ -5202,7 +5283,8 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8(
@@ -5246,7 +5328,8 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i8(
@@ -5290,7 +5373,8 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i8(
@@ -5334,7 +5418,8 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i8(
@@ -5378,7 +5463,8 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8(
@@ -5422,7 +5508,8 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8(<vscal
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8(
@@ -5466,7 +5553,8 @@ define <vscale x 32 x half> @intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8(<vscal
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e16,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8(
@@ -5510,7 +5598,8 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8(
@@ -5554,7 +5643,8 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8(
@@ -5598,7 +5688,8 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8(
@@ -5642,7 +5733,8 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8(<vscale x
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8(
@@ -5686,7 +5778,8 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8(<vsca
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e32,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8(
@@ -5730,7 +5823,8 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m1,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v25, (a0), v8
+; CHECK-NEXT:    vmv1r.v v8, v25
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8(
@@ -5774,7 +5868,8 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m2,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v26, (a0), v8
+; CHECK-NEXT:    vmv2r.v v8, v26
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8(
@@ -5818,7 +5913,8 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m4,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v28, (a0), v8
+; CHECK-NEXT:    vmv4r.v v8, v28
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8(
@@ -5862,7 +5958,8 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8(<vscale
 ; CHECK-LABEL: intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a1, a1, e64,m8,ta,mu
-; CHECK-NEXT:    vluxei8.v v8, (a0), v8
+; CHECK-NEXT:    vluxei8.v v16, (a0), v8
+; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8(


        


More information about the llvm-commits mailing list