[llvm] 896f9bc - [RISCV] Remove earlyclobber from vnsrl/vnsra/vnclip(u) when the source and dest are a single vector register.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 1 09:21:26 PDT 2021


Author: Craig Topper
Date: 2021-06-01T09:17:52-07:00
New Revision: 896f9bc350eba0baf17f1ceae7383d88f0ce2a85

URL: https://github.com/llvm/llvm-project/commit/896f9bc350eba0baf17f1ceae7383d88f0ce2a85
DIFF: https://github.com/llvm/llvm-project/commit/896f9bc350eba0baf17f1ceae7383d88f0ce2a85.diff

LOG: [RISCV] Remove earlyclobber from vnsrl/vnsra/vnclip(u) when the source and dest are a single vector register.

This guarantees they meet this overlap exception:

"The destination EEW is smaller than the source EEW and the overlap
is in the lowest-numbered part of the source register group"

Being a single register guarantees the overlap is always in the
lowerst-number part of the group.

Reviewed By: frasercrmck, khchen

Differential Revision: https://reviews.llvm.org/D103351

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
    llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
    llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
    llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
    llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode-rv64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index c672113f739a..7715c2e91ef0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -1571,22 +1571,27 @@ multiclass VPseudoBinaryW_WF {
                                        f.fprclass, m>;
 }
 
+// Narrowing instructions like vnsrl/vnsra/vnclip(u) don't need @earlyclobber
+// if the source and destination have an LMUL<=1. This matches this overlap
+// exception from the spec.
+// "The destination EEW is smaller than the source EEW and the overlap is in the
+//  lowest-numbered part of the source register group."
 multiclass VPseudoBinaryV_WV {
   foreach m = MxListW.m in
     defm _WV : VPseudoBinary<m.vrclass, m.wvrclass, m.vrclass, m,
-                             "@earlyclobber $rd">;
+                             !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
 }
 
 multiclass VPseudoBinaryV_WX {
   foreach m = MxListW.m in
     defm _WX : VPseudoBinary<m.vrclass, m.wvrclass, GPR, m,
-                             "@earlyclobber $rd">;
+                             !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
 }
 
 multiclass VPseudoBinaryV_WI {
   foreach m = MxListW.m in
     defm _WI : VPseudoBinary<m.vrclass, m.wvrclass, uimm5, m,
-                             "@earlyclobber $rd">;
+                             !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
 }
 
 // For vadc and vsbc, the instruction encoding is reserved if the destination

diff  --git a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
index ee4006a5915b..fcd9688329ae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
@@ -708,8 +708,8 @@ define void @truncstore_nxv1i32_nxv1i8(<vscale x 1 x i32> %x, <vscale x 1 x i8>*
 ; CHECK-NEXT:    vsetvli a1, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse8.v v26, (a0)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse8.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
   store <vscale x 1 x i8> %y, <vscale x 1 x i8>* %z
@@ -760,8 +760,8 @@ define void @truncstore_nxv2i32_nxv2i8(<vscale x 2 x i32> %x, <vscale x 2 x i8>*
 ; CHECK-NEXT:    vsetvli a1, zero, e16,mf2,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse8.v v26, (a0)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse8.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i32> %x to <vscale x 2 x i8>
   store <vscale x 2 x i8> %y, <vscale x 2 x i8>* %z
@@ -810,8 +810,8 @@ define void @truncstore_nxv4i32_nxv4i8(<vscale x 4 x i32> %x, <vscale x 4 x i8>*
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse8.v v26, (a0)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse8.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i32> %x to <vscale x 4 x i8>
   store <vscale x 4 x i8> %y, <vscale x 4 x i8>* %z
@@ -936,9 +936,9 @@ define void @truncstore_nxv1i64_nxv1i8(<vscale x 1 x i64> %x, <vscale x 1 x i8>*
 ; CHECK-NEXT:    vsetvli a1, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v26, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vse8.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
@@ -952,8 +952,8 @@ define void @truncstore_nxv1i64_nxv1i16(<vscale x 1 x i64> %x, <vscale x 1 x i16
 ; CHECK-NEXT:    vsetvli a1, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse16.v v26, (a0)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse16.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 1 x i64> %x to <vscale x 1 x i16>
   store <vscale x 1 x i16> %y, <vscale x 1 x i16>* %z
@@ -978,9 +978,9 @@ define void @truncstore_nxv2i64_nxv2i8(<vscale x 2 x i64> %x, <vscale x 2 x i8>*
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v26, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vse8.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i64> %x to <vscale x 2 x i8>
@@ -994,8 +994,8 @@ define void @truncstore_nxv2i64_nxv2i16(<vscale x 2 x i64> %x, <vscale x 2 x i16
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse16.v v26, (a0)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse16.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i64> %x to <vscale x 2 x i16>
   store <vscale x 2 x i16> %y, <vscale x 2 x i16>* %z
@@ -1022,8 +1022,8 @@ define void @truncstore_nxv4i64_nxv4i8(<vscale x 4 x i64> %x, <vscale x 4 x i8>*
 ; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v26, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse8.v v26, (a0)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse8.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i64> %x to <vscale x 4 x i8>
   store <vscale x 4 x i8> %y, <vscale x 4 x i8>* %z

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
index 6464e12e2334..f829d2dc4d64 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
@@ -1039,8 +1039,8 @@ define void @truncstore_v2i32_v2i8(<2 x i32> %x, <2 x i8>* %z) {
 ; CHECK-NEXT:    vsetivli zero, 2, e16,mf4,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse8.v v26, (a0)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse8.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <2 x i32> %x to <2 x i8>
   store <2 x i8> %y, <2 x i8>* %z
@@ -1091,8 +1091,8 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, <4 x i8>* %z) {
 ; CHECK-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse8.v v26, (a0)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse8.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <4 x i32> %x to <4 x i8>
   store <4 x i8> %y, <4 x i8>* %z
@@ -1165,19 +1165,19 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v8, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v25, 0
+; LMULMAX1-NEXT:    vmv.v.i v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v26, 0
+; LMULMAX1-NEXT:    vslideup.vi v26, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v9, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v27, 4
+; LMULMAX1-NEXT:    vslideup.vi v26, v25, 4
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX1-NEXT:    vse8.v v25, (a0)
+; LMULMAX1-NEXT:    vse8.v v26, (a0)
 ; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX4-LABEL: truncstore_v8i32_v8i8:
@@ -1185,8 +1185,8 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %z) {
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e16,m1,ta,mu
 ; LMULMAX4-NEXT:    vnsrl.wi v25, v8, 0
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX4-NEXT:    vnsrl.wi v26, v25, 0
-; LMULMAX4-NEXT:    vse8.v v26, (a0)
+; LMULMAX4-NEXT:    vnsrl.wi v25, v25, 0
+; LMULMAX4-NEXT:    vse8.v v25, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <8 x i32> %x to <8 x i8>
   store <8 x i8> %y, <8 x i8>* %z
@@ -1289,38 +1289,38 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v8, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v25, 0
+; LMULMAX1-NEXT:    vmv.v.i v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vmv1r.v v27, v25
-; LMULMAX1-NEXT:    vslideup.vi v27, v26, 0
+; LMULMAX1-NEXT:    vmv1r.v v27, v26
+; LMULMAX1-NEXT:    vslideup.vi v27, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v9, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v27, v28, 4
+; LMULMAX1-NEXT:    vslideup.vi v27, v25, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 16, e8,m1,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v26, 0
+; LMULMAX1-NEXT:    vmv.v.i v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,m1,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v26, v27, 0
+; LMULMAX1-NEXT:    vslideup.vi v25, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v27, v10, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v28, 0
+; LMULMAX1-NEXT:    vslideup.vi v26, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v27, v11, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v28, 4
+; LMULMAX1-NEXT:    vslideup.vi v26, v27, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 16, e8,m1,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v26, v25, 8
+; LMULMAX1-NEXT:    vslideup.vi v25, v26, 8
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
-; LMULMAX1-NEXT:    vse8.v v26, (a0)
+; LMULMAX1-NEXT:    vse8.v v25, (a0)
 ; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX4-LABEL: truncstore_v16i32_v16i8:
@@ -1477,9 +1477,9 @@ define void @truncstore_v2i64_v2i8(<2 x i64> %x, <2 x i8>* %z) {
 ; CHECK-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v26, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vse8.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <2 x i64> %x to <2 x i8>
@@ -1493,8 +1493,8 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, <2 x i16>* %z) {
 ; CHECK-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse16.v v26, (a0)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse16.v v25, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <2 x i64> %x to <2 x i16>
   store <2 x i16> %y, <2 x i16>* %z
@@ -1519,9 +1519,9 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v8, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v25, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,ta,mu
 ; LMULMAX1-NEXT:    vmv.v.i v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
@@ -1529,9 +1529,9 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v9, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v25, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
 ; LMULMAX1-NEXT:    vslideup.vi v26, v25, 2
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
@@ -1543,9 +1543,9 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %z) {
 ; LMULMAX4-NEXT:    vsetivli zero, 4, e32,m1,ta,mu
 ; LMULMAX4-NEXT:    vnsrl.wi v25, v8, 0
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; LMULMAX4-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX4-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; LMULMAX4-NEXT:    vnsrl.wi v25, v26, 0
+; LMULMAX4-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX4-NEXT:    vse8.v v25, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <4 x i64> %x to <4 x i8>
@@ -1559,19 +1559,19 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v8, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v25, 0
+; LMULMAX1-NEXT:    vmv.v.i v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v26, 0
+; LMULMAX1-NEXT:    vslideup.vi v26, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v9, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v27, 2
+; LMULMAX1-NEXT:    vslideup.vi v26, v25, 2
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; LMULMAX1-NEXT:    vse16.v v25, (a0)
+; LMULMAX1-NEXT:    vse16.v v26, (a0)
 ; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX4-LABEL: truncstore_v4i64_v4i16:
@@ -1579,8 +1579,8 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %z) {
 ; LMULMAX4-NEXT:    vsetivli zero, 4, e32,m1,ta,mu
 ; LMULMAX4-NEXT:    vnsrl.wi v25, v8, 0
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; LMULMAX4-NEXT:    vnsrl.wi v26, v25, 0
-; LMULMAX4-NEXT:    vse16.v v26, (a0)
+; LMULMAX4-NEXT:    vnsrl.wi v25, v25, 0
+; LMULMAX4-NEXT:    vse16.v v25, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <4 x i64> %x to <4 x i16>
   store <4 x i16> %y, <4 x i16>* %z
@@ -1621,9 +1621,9 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v8, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v25, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,ta,mu
 ; LMULMAX1-NEXT:    vmv.v.i v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
@@ -1632,9 +1632,9 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v9, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v25, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
 ; LMULMAX1-NEXT:    vslideup.vi v27, v25, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
@@ -1644,17 +1644,17 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v27, v10, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
 ; LMULMAX1-NEXT:    vslideup.vi v26, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v27, v11, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
 ; LMULMAX1-NEXT:    vslideup.vi v26, v27, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,tu,mu
@@ -1670,8 +1670,8 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %z) {
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; LMULMAX4-NEXT:    vnsrl.wi v25, v26, 0
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX4-NEXT:    vnsrl.wi v26, v25, 0
-; LMULMAX4-NEXT:    vse8.v v26, (a0)
+; LMULMAX4-NEXT:    vnsrl.wi v25, v25, 0
+; LMULMAX4-NEXT:    vse8.v v25, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <8 x i64> %x to <8 x i8>
   store <8 x i8> %y, <8 x i8>* %z
@@ -1684,38 +1684,38 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v8, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v25, 0
+; LMULMAX1-NEXT:    vmv.v.i v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vmv1r.v v27, v25
-; LMULMAX1-NEXT:    vslideup.vi v27, v26, 0
+; LMULMAX1-NEXT:    vmv1r.v v27, v26
+; LMULMAX1-NEXT:    vslideup.vi v27, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v9, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v27, v28, 2
+; LMULMAX1-NEXT:    vslideup.vi v27, v25, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16,m1,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v26, 0
+; LMULMAX1-NEXT:    vmv.v.i v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,m1,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v26, v27, 0
+; LMULMAX1-NEXT:    vslideup.vi v25, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v27, v10, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v28, 0
+; LMULMAX1-NEXT:    vslideup.vi v26, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v27, v11, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v28, 2
+; LMULMAX1-NEXT:    vslideup.vi v26, v27, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16,m1,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v26, v25, 4
+; LMULMAX1-NEXT:    vslideup.vi v25, v26, 4
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
-; LMULMAX1-NEXT:    vse16.v v26, (a0)
+; LMULMAX1-NEXT:    vse16.v v25, (a0)
 ; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX4-LABEL: truncstore_v8i64_v8i16:
@@ -1776,91 +1776,91 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, <16 x i8>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v8, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,ta,mu
 ; LMULMAX1-NEXT:    vmv.v.i v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vmv1r.v v28, v25
-; LMULMAX1-NEXT:    vslideup.vi v28, v27, 0
+; LMULMAX1-NEXT:    vmv1r.v v27, v25
+; LMULMAX1-NEXT:    vslideup.vi v27, v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v26, v9, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v26, v26, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v26, v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v28, v26, 2
+; LMULMAX1-NEXT:    vslideup.vi v27, v26, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
 ; LMULMAX1-NEXT:    vmv.v.i v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vmv1r.v v27, v26
-; LMULMAX1-NEXT:    vslideup.vi v27, v28, 0
+; LMULMAX1-NEXT:    vmv1r.v v28, v26
+; LMULMAX1-NEXT:    vslideup.vi v28, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v10, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v29, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
 ; LMULMAX1-NEXT:    vmv1r.v v29, v25
-; LMULMAX1-NEXT:    vslideup.vi v29, v28, 0
+; LMULMAX1-NEXT:    vslideup.vi v29, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v11, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v30, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v30, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v29, v28, 2
+; LMULMAX1-NEXT:    vslideup.vi v29, v27, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v27, v29, 4
+; LMULMAX1-NEXT:    vslideup.vi v28, v29, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 16, e8,m1,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v28, 0
+; LMULMAX1-NEXT:    vmv.v.i v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,m1,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v28, v27, 0
+; LMULMAX1-NEXT:    vslideup.vi v27, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v12, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v12, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v29, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
 ; LMULMAX1-NEXT:    vmv1r.v v29, v25
-; LMULMAX1-NEXT:    vslideup.vi v29, v27, 0
+; LMULMAX1-NEXT:    vslideup.vi v29, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v13, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v13, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v30, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v30, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v29, v27, 2
+; LMULMAX1-NEXT:    vslideup.vi v29, v28, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf2,tu,mu
 ; LMULMAX1-NEXT:    vslideup.vi v26, v29, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v14, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v14, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v29, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v27, 0
+; LMULMAX1-NEXT:    vslideup.vi v25, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v15, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v15, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v29, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v27, 2
+; LMULMAX1-NEXT:    vslideup.vi v25, v28, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,tu,mu
 ; LMULMAX1-NEXT:    vslideup.vi v26, v25, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 16, e8,m1,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v28, v26, 8
+; LMULMAX1-NEXT:    vslideup.vi v27, v26, 8
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
-; LMULMAX1-NEXT:    vse8.v v28, (a0)
+; LMULMAX1-NEXT:    vse8.v v27, (a0)
 ; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX4-LABEL: truncstore_v16i64_v16i8:
@@ -1870,21 +1870,21 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, <16 x i8>* %z) {
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; LMULMAX4-NEXT:    vnsrl.wi v25, v26, 0
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX4-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX4-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX4-NEXT:    vsetivli zero, 16, e8,m1,ta,mu
-; LMULMAX4-NEXT:    vmv.v.i v25, 0
+; LMULMAX4-NEXT:    vmv.v.i v26, 0
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e8,m1,tu,mu
-; LMULMAX4-NEXT:    vslideup.vi v25, v26, 0
+; LMULMAX4-NEXT:    vslideup.vi v26, v25, 0
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e32,m2,ta,mu
-; LMULMAX4-NEXT:    vnsrl.wi v26, v12, 0
+; LMULMAX4-NEXT:    vnsrl.wi v28, v12, 0
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
-; LMULMAX4-NEXT:    vnsrl.wi v28, v26, 0
+; LMULMAX4-NEXT:    vnsrl.wi v25, v28, 0
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX4-NEXT:    vnsrl.wi v26, v28, 0
+; LMULMAX4-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX4-NEXT:    vsetivli zero, 16, e8,m1,tu,mu
-; LMULMAX4-NEXT:    vslideup.vi v25, v26, 8
+; LMULMAX4-NEXT:    vslideup.vi v26, v25, 8
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
-; LMULMAX4-NEXT:    vse8.v v25, (a0)
+; LMULMAX4-NEXT:    vse8.v v26, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <16 x i64> %x to <16 x i8>
   store <16 x i8> %y, <16 x i8>* %z
@@ -1906,9 +1906,9 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v26, v9, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v26, v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v28, v27, 2
+; LMULMAX1-NEXT:    vslideup.vi v28, v26, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16,m1,ta,mu
 ; LMULMAX1-NEXT:    vmv.v.i v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,m1,tu,mu
@@ -1917,45 +1917,45 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v28, v10, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vmv1r.v v28, v25
-; LMULMAX1-NEXT:    vslideup.vi v28, v29, 0
+; LMULMAX1-NEXT:    vmv1r.v v29, v25
+; LMULMAX1-NEXT:    vslideup.vi v29, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v11, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v30, v29, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v28, v30, 2
+; LMULMAX1-NEXT:    vslideup.vi v29, v28, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16,m1,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v27, v28, 4
+; LMULMAX1-NEXT:    vslideup.vi v27, v29, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v28, v12, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vmv1r.v v28, v25
-; LMULMAX1-NEXT:    vslideup.vi v28, v29, 0
+; LMULMAX1-NEXT:    vmv1r.v v29, v25
+; LMULMAX1-NEXT:    vslideup.vi v29, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v13, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v13, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v30, v29, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v28, v30, 2
+; LMULMAX1-NEXT:    vslideup.vi v29, v28, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,m1,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v26, v28, 0
+; LMULMAX1-NEXT:    vslideup.vi v26, v29, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v28, v14, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v29, 0
+; LMULMAX1-NEXT:    vslideup.vi v25, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v28, v15, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v28, v28, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v25, v29, 2
+; LMULMAX1-NEXT:    vslideup.vi v25, v28, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16,m1,tu,mu
 ; LMULMAX1-NEXT:    vslideup.vi v26, v25, 4
 ; LMULMAX1-NEXT:    addi a1, a0, 16

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index be8efd4c53c3..25a38a692e26 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -374,8 +374,8 @@ define void @fp2si_v2f64_v2i8(<2 x double>* %x, <2 x i8>* %y) {
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v26, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse8.v v26, (a1)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse8.v v25, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x double>, <2 x double>* %x
   %d = fptosi <2 x double> %a to <2 x i8>
@@ -393,8 +393,8 @@ define void @fp2ui_v2f64_v2i8(<2 x double>* %x, <2 x i8>* %y) {
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v26, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
-; CHECK-NEXT:    vse8.v v26, (a1)
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
+; CHECK-NEXT:    vse8.v v25, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x double>, <2 x double>* %x
   %d = fptoui <2 x double> %a to <2 x i8>
@@ -436,8 +436,8 @@ define void @fp2si_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; LMULMAX8-NEXT:    vnsrl.wi v25, v26, 0
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX8-NEXT:    vnsrl.wi v26, v25, 0
-; LMULMAX8-NEXT:    vse8.v v26, (a1)
+; LMULMAX8-NEXT:    vnsrl.wi v25, v25, 0
+; LMULMAX8-NEXT:    vse8.v v25, (a1)
 ; LMULMAX8-NEXT:    ret
 ;
 ; LMULMAX1-LABEL: fp2si_v8f64_v8i8:
@@ -455,44 +455,44 @@ define void @fp2si_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v27, v29, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v27, 0
+; LMULMAX1-NEXT:    vmv.v.i v29, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vmv1r.v v30, v27
-; LMULMAX1-NEXT:    vslideup.vi v30, v29, 0
+; LMULMAX1-NEXT:    vmv1r.v v30, v29
+; LMULMAX1-NEXT:    vslideup.vi v30, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v29, v28
+; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v27, v28
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v29, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v30, v29, 2
+; LMULMAX1-NEXT:    vslideup.vi v30, v27, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v28, 0
+; LMULMAX1-NEXT:    vmv.v.i v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v28, v30, 0
+; LMULMAX1-NEXT:    vslideup.vi v27, v30, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v29, v26
+; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v28, v26
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v29, 0
+; LMULMAX1-NEXT:    vnsrl.wi v26, v28, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v26, v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v27, v29, 0
+; LMULMAX1-NEXT:    vslideup.vi v29, v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v26, v25
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v26, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v27, v26, 2
+; LMULMAX1-NEXT:    vslideup.vi v29, v25, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v28, v27, 4
+; LMULMAX1-NEXT:    vslideup.vi v27, v29, 4
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX1-NEXT:    vse8.v v28, (a1)
+; LMULMAX1-NEXT:    vse8.v v27, (a1)
 ; LMULMAX1-NEXT:    ret
   %a = load <8 x double>, <8 x double>* %x
   %d = fptosi <8 x double> %a to <8 x i8>
@@ -510,8 +510,8 @@ define void @fp2ui_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; LMULMAX8-NEXT:    vnsrl.wi v25, v26, 0
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX8-NEXT:    vnsrl.wi v26, v25, 0
-; LMULMAX8-NEXT:    vse8.v v26, (a1)
+; LMULMAX8-NEXT:    vnsrl.wi v25, v25, 0
+; LMULMAX8-NEXT:    vse8.v v25, (a1)
 ; LMULMAX8-NEXT:    ret
 ;
 ; LMULMAX1-LABEL: fp2ui_v8f64_v8i8:
@@ -529,44 +529,44 @@ define void @fp2ui_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v27, v29, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v27, 0
+; LMULMAX1-NEXT:    vmv.v.i v29, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vmv1r.v v30, v27
-; LMULMAX1-NEXT:    vslideup.vi v30, v29, 0
+; LMULMAX1-NEXT:    vmv1r.v v30, v29
+; LMULMAX1-NEXT:    vslideup.vi v30, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v29, v28
+; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v27, v28
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v28, v29, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v28, 0
+; LMULMAX1-NEXT:    vnsrl.wi v27, v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v30, v29, 2
+; LMULMAX1-NEXT:    vslideup.vi v30, v27, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
-; LMULMAX1-NEXT:    vmv.v.i v28, 0
+; LMULMAX1-NEXT:    vmv.v.i v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v28, v30, 0
+; LMULMAX1-NEXT:    vslideup.vi v27, v30, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v29, v26
+; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v28, v26
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v29, 0
+; LMULMAX1-NEXT:    vnsrl.wi v26, v28, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v29, v26, 0
+; LMULMAX1-NEXT:    vnsrl.wi v26, v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v27, v29, 0
+; LMULMAX1-NEXT:    vslideup.vi v29, v26, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v26, v25
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v26, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf4,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v27, v26, 2
+; LMULMAX1-NEXT:    vslideup.vi v29, v25, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v28, v27, 4
+; LMULMAX1-NEXT:    vslideup.vi v27, v29, 4
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX1-NEXT:    vse8.v v28, (a1)
+; LMULMAX1-NEXT:    vse8.v v27, (a1)
 ; LMULMAX1-NEXT:    ret
   %a = load <8 x double>, <8 x double>* %x
   %d = fptoui <8 x double> %a to <8 x i8>

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
index 30fb0b679d3a..8eaf607e960a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
@@ -169,9 +169,9 @@ define void @trunc_v4i8_v4i32(<4 x i32>* %x, <4 x i8>* %z) {
 ; CHECK-NEXT:    vsetivli zero, 4, e32,m1,ta,mu
 ; CHECK-NEXT:    vle32.v v25, (a0)
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v26, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vse8.v v25, (a1)
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, <4 x i32>* %x
@@ -188,8 +188,8 @@ define void @trunc_v8i8_v8i32(<8 x i32>* %x, <8 x i8>* %z) {
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; LMULMAX8-NEXT:    vnsrl.wi v25, v26, 0
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX8-NEXT:    vnsrl.wi v26, v25, 0
-; LMULMAX8-NEXT:    vse8.v v26, (a1)
+; LMULMAX8-NEXT:    vnsrl.wi v25, v25, 0
+; LMULMAX8-NEXT:    vse8.v v25, (a1)
 ; LMULMAX8-NEXT:    ret
 ;
 ; LMULMAX2-LABEL: trunc_v8i8_v8i32:
@@ -199,8 +199,8 @@ define void @trunc_v8i8_v8i32(<8 x i32>* %x, <8 x i8>* %z) {
 ; LMULMAX2-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
 ; LMULMAX2-NEXT:    vnsrl.wi v25, v26, 0
 ; LMULMAX2-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
-; LMULMAX2-NEXT:    vnsrl.wi v26, v25, 0
-; LMULMAX2-NEXT:    vse8.v v26, (a1)
+; LMULMAX2-NEXT:    vnsrl.wi v25, v25, 0
+; LMULMAX2-NEXT:    vse8.v v25, (a1)
 ; LMULMAX2-NEXT:    ret
 ;
 ; LMULMAX1-LABEL: trunc_v8i8_v8i32:
@@ -210,9 +210,9 @@ define void @trunc_v8i8_v8i32(<8 x i32>* %x, <8 x i8>* %z) {
 ; LMULMAX1-NEXT:    addi a0, a0, 16
 ; LMULMAX1-NEXT:    vle32.v v26, (a0)
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v27, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v25, v27, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
 ; LMULMAX1-NEXT:    vmv.v.i v27, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8,mf2,tu,mu
@@ -220,9 +220,9 @@ define void @trunc_v8i8_v8i32(<8 x i32>* %x, <8 x i8>* %z) {
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
 ; LMULMAX1-NEXT:    vnsrl.wi v25, v26, 0
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; LMULMAX1-NEXT:    vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT:    vnsrl.wi v25, v25, 0
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8,mf2,tu,mu
-; LMULMAX1-NEXT:    vslideup.vi v27, v26, 4
+; LMULMAX1-NEXT:    vslideup.vi v27, v25, 4
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8,mf2,ta,mu
 ; LMULMAX1-NEXT:    vse8.v v27, (a1)
 ; LMULMAX1-NEXT:    ret

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index a7b6c3ab1dc5..6c1fb6a50ecb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -65,8 +65,8 @@ define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x i8*> %ptrs, <2
 ; RV32-NEXT:    vsetivli zero, 2, e16,mf4,ta,mu
 ; RV32-NEXT:    vnsrl.wi v25, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; RV32-NEXT:    vnsrl.wi v26, v25, 0
-; RV32-NEXT:    vsoxei32.v v26, (zero), v9, v0.t
+; RV32-NEXT:    vnsrl.wi v25, v25, 0
+; RV32-NEXT:    vsoxei32.v v25, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v2i32_truncstore_v2i8:
@@ -74,8 +74,8 @@ define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x i8*> %ptrs, <2
 ; RV64-NEXT:    vsetivli zero, 2, e16,mf4,ta,mu
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; RV64-NEXT:    vnsrl.wi v26, v25, 0
-; RV64-NEXT:    vsoxei64.v v26, (zero), v9, v0.t
+; RV64-NEXT:    vnsrl.wi v25, v25, 0
+; RV64-NEXT:    vsoxei64.v v25, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i32> %val to <2 x i8>
   call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> %tval, <2 x i8*> %ptrs, i32 1, <2 x i1> %m)
@@ -88,9 +88,9 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x i8*> %ptrs, <2
 ; RV32-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; RV32-NEXT:    vnsrl.wi v25, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; RV32-NEXT:    vnsrl.wi v26, v25, 0
+; RV32-NEXT:    vnsrl.wi v25, v25, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; RV32-NEXT:    vnsrl.wi v25, v26, 0
+; RV32-NEXT:    vnsrl.wi v25, v25, 0
 ; RV32-NEXT:    vsoxei32.v v25, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -99,9 +99,9 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x i8*> %ptrs, <2
 ; RV64-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; RV64-NEXT:    vnsrl.wi v26, v25, 0
+; RV64-NEXT:    vnsrl.wi v25, v25, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; RV64-NEXT:    vnsrl.wi v25, v26, 0
+; RV64-NEXT:    vnsrl.wi v25, v25, 0
 ; RV64-NEXT:    vsoxei64.v v25, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i64> %val to <2 x i8>
@@ -257,8 +257,8 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x i16*> %ptrs, <
 ; RV32-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; RV32-NEXT:    vnsrl.wi v25, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; RV32-NEXT:    vnsrl.wi v26, v25, 0
-; RV32-NEXT:    vsoxei32.v v26, (zero), v9, v0.t
+; RV32-NEXT:    vnsrl.wi v25, v25, 0
+; RV32-NEXT:    vsoxei32.v v25, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i16:
@@ -266,8 +266,8 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x i16*> %ptrs, <
 ; RV64-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; RV64-NEXT:    vnsrl.wi v26, v25, 0
-; RV64-NEXT:    vsoxei64.v v26, (zero), v9, v0.t
+; RV64-NEXT:    vnsrl.wi v25, v25, 0
+; RV64-NEXT:    vsoxei64.v v25, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i64> %val to <2 x i16>
   call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> %tval, <2 x i16*> %ptrs, i32 2, <2 x i1> %m)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
index b2a72e1a4e95..d0505d14d1d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
@@ -6,8 +6,7 @@ define <8 x i8> @vnsra_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) {
 ; CHECK-LABEL: vnsra_v8i16_v8i8_scalar:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %insert = insertelement <8 x i16> undef, i16 %y, i16 0
   %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -20,8 +19,7 @@ define <4 x i16> @vnsra_v4i32_v4i16_scalar(<4 x i32> %x, i32 %y) {
 ; CHECK-LABEL: vnsra_v4i32_v4i16_scalar:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %insert = insertelement <4 x i32> undef, i32 %y, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -34,8 +32,7 @@ define <2 x i32> @vnsra_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) {
 ; CHECK-LABEL: vnsra_v2i64_v2i32_scalar:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 ; RV32-LABEL: vnsra_v2i64_v2i32_scalar:
 ; RV32:       # %bb.0:
@@ -68,8 +65,7 @@ define <8 x i8> @vnsra_v8i16_v8i8_imm(<8 x i16> %x) {
 ; CHECK-LABEL: vnsra_v8i16_v8i8_imm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 8
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 8
 ; CHECK-NEXT:    ret
   %a = ashr <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8,i16 8, i16 8, i16 8, i16 8>
   %b = trunc <8 x i16> %a to <8 x i8>
@@ -80,8 +76,7 @@ define <4 x i16> @vnsra_v4i32_v4i16_imm(<4 x i32> %x) {
 ; CHECK-LABEL: vnsra_v4i32_v4i16_imm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 16
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 16
 ; CHECK-NEXT:    ret
   %a = ashr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
   %b = trunc <4 x i32> %a to <4 x i16>
@@ -92,8 +87,7 @@ define <2 x i32> @vnsra_v2i64_v2i32_imm(<2 x i64> %x) {
 ; CHECK-LABEL: vnsra_v2i64_v2i32_imm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 31
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 31
 ; CHECK-NEXT:    ret
   %a = ashr <2 x i64> %x, <i64 31, i64 31>
   %b = trunc <2 x i64> %a to <2 x i32>
@@ -104,8 +98,7 @@ define <8 x i8> @vnsrl_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) {
 ; CHECK-LABEL: vnsrl_v8i16_v8i8_scalar:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %insert = insertelement <8 x i16> undef, i16 %y, i16 0
   %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -118,8 +111,7 @@ define <4 x i16> @vnsrl_v4i32_v4i16_scalar(<4 x i32> %x, i32 %y) {
 ; CHECK-LABEL: vnsrl_v4i32_v4i16_scalar:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
   %insert = insertelement <4 x i32> undef, i32 %y, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -132,8 +124,7 @@ define <2 x i32> @vnsrl_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) {
 ; CHECK-LABEL: vnsrl_v2i64_v2i32_scalar:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 ; RV32-LABEL: vnsrl_v2i64_v2i32_scalar:
 ; RV32:       # %bb.0:
@@ -166,8 +157,7 @@ define <8 x i8> @vnsrl_v8i16_v8i8_imm(<8 x i16> %x) {
 ; CHECK-LABEL: vnsrl_v8i16_v8i8_imm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 8
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 8
 ; CHECK-NEXT:    ret
   %a = lshr <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8,i16 8, i16 8, i16 8, i16 8>
   %b = trunc <8 x i16> %a to <8 x i8>
@@ -178,8 +168,7 @@ define <4 x i16> @vnsrl_v4i32_v4i16_imm(<4 x i32> %x) {
 ; CHECK-LABEL: vnsrl_v4i32_v4i16_imm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 16
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 16
 ; CHECK-NEXT:    ret
   %a = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
   %b = trunc <4 x i32> %a to <4 x i16>
@@ -190,8 +179,7 @@ define <2 x i32> @vnsrl_v2i64_v2i32_imm(<2 x i64> %x) {
 ; CHECK-LABEL: vnsrl_v2i64_v2i32_imm:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 31
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 31
 ; CHECK-NEXT:    ret
   %a = lshr <2 x i64> %x, <i64 31, i64 31>
   %b = trunc <2 x i64> %a to <2 x i32>

diff  --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
index a95d3dced0f0..342fa20c560e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
@@ -65,8 +65,8 @@ define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale
 ; RV32-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; RV32-NEXT:    vnsrl.wi v25, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; RV32-NEXT:    vnsrl.wi v26, v25, 0
-; RV32-NEXT:    vsoxei32.v v26, (zero), v9, v0.t
+; RV32-NEXT:    vnsrl.wi v25, v25, 0
+; RV32-NEXT:    vsoxei32.v v25, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
@@ -74,8 +74,8 @@ define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale
 ; RV64-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; RV64-NEXT:    vnsrl.wi v26, v25, 0
-; RV64-NEXT:    vsoxei64.v v26, (zero), v10, v0.t
+; RV64-NEXT:    vnsrl.wi v25, v25, 0
+; RV64-NEXT:    vsoxei64.v v25, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
   call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
@@ -88,9 +88,9 @@ define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale
 ; RV32-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
 ; RV32-NEXT:    vnsrl.wi v25, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; RV32-NEXT:    vnsrl.wi v26, v25, 0
+; RV32-NEXT:    vnsrl.wi v25, v25, 0
 ; RV32-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; RV32-NEXT:    vnsrl.wi v25, v26, 0
+; RV32-NEXT:    vnsrl.wi v25, v25, 0
 ; RV32-NEXT:    vsoxei32.v v25, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
@@ -99,9 +99,9 @@ define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale
 ; RV64-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; RV64-NEXT:    vnsrl.wi v26, v25, 0
+; RV64-NEXT:    vnsrl.wi v25, v25, 0
 ; RV64-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; RV64-NEXT:    vnsrl.wi v25, v26, 0
+; RV64-NEXT:    vnsrl.wi v25, v25, 0
 ; RV64-NEXT:    vsoxei64.v v25, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
@@ -257,8 +257,8 @@ define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscal
 ; RV32-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
 ; RV32-NEXT:    vnsrl.wi v25, v8, 0
 ; RV32-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; RV32-NEXT:    vnsrl.wi v26, v25, 0
-; RV32-NEXT:    vsoxei32.v v26, (zero), v10, v0.t
+; RV32-NEXT:    vnsrl.wi v25, v25, 0
+; RV32-NEXT:    vsoxei32.v v25, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
@@ -266,8 +266,8 @@ define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscal
 ; RV64-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
 ; RV64-NEXT:    vnsrl.wi v25, v8, 0
 ; RV64-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; RV64-NEXT:    vnsrl.wi v26, v25, 0
-; RV64-NEXT:    vsoxei64.v v26, (zero), v10, v0.t
+; RV64-NEXT:    vnsrl.wi v25, v25, 0
+; RV64-NEXT:    vsoxei64.v v25, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
   call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
index 3042ed4db0ff..3c75fa76b45d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
@@ -1174,9 +1174,9 @@ define <vscale x 1 x i8> @vfptosi_nxv1f64_nxv1i8(<vscale x 1 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v25, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v8, v26, 0
+; CHECK-NEXT:    vnsrl.wi v8, v25, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 1 x double> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %evec
@@ -1188,9 +1188,9 @@ define <vscale x 1 x i8> @vfptoui_nxv1f64_nxv1i8(<vscale x 1 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v25, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v8, v26, 0
+; CHECK-NEXT:    vnsrl.wi v8, v25, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 1 x double> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %evec
@@ -1292,9 +1292,9 @@ define <vscale x 2 x i8> @vfptosi_nxv2f64_nxv2i8(<vscale x 2 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v25, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v8, v26, 0
+; CHECK-NEXT:    vnsrl.wi v8, v25, 0
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x double> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %evec
@@ -1306,9 +1306,9 @@ define <vscale x 2 x i8> @vfptoui_nxv2f64_nxv2i8(<vscale x 2 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v25, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v8, v26, 0
+; CHECK-NEXT:    vnsrl.wi v8, v25, 0
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x double> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %evec

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll
index 856afb8989c6..205371fc8ae0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i8> @intrinsic_vnclip_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv1i8_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclip.nxv1i8.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i8> @intrinsic_vnclip_wv_nxv2i8_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv2i8_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclip.nxv2i8.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i8> @intrinsic_vnclip_wv_nxv4i8_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv4i8_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclip.nxv4i8.nxv4i16.nxv4i8(
@@ -280,8 +277,7 @@ define <vscale x 1 x i16> @intrinsic_vnclip_wv_nxv1i16_nxv1i32_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv1i16_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclip.nxv1i16.nxv1i32.nxv1i16(
@@ -325,8 +321,7 @@ define <vscale x 2 x i16> @intrinsic_vnclip_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv2i16_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
@@ -505,8 +500,7 @@ define <vscale x 1 x i32> @intrinsic_vnclip_wv_nxv1i32_nxv1i64_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv1i32_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclip.nxv1i32.nxv1i64.nxv1i32(
@@ -685,8 +679,7 @@ define <vscale x 1 x i8> @intrinsic_vnclip_vx_nxv1i8_nxv1i16(<vscale x 1 x i16>
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclip.nxv1i8.nxv1i16(
@@ -730,8 +723,7 @@ define <vscale x 2 x i8> @intrinsic_vnclip_vx_nxv2i8_nxv2i16(<vscale x 2 x i16>
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclip.nxv2i8.nxv2i16(
@@ -775,8 +767,7 @@ define <vscale x 4 x i8> @intrinsic_vnclip_vx_nxv4i8_nxv4i16(<vscale x 4 x i16>
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclip.nxv4i8.nxv4i16(
@@ -955,8 +946,7 @@ define <vscale x 1 x i16> @intrinsic_vnclip_vx_nxv1i16_nxv1i32(<vscale x 1 x i32
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclip.nxv1i16.nxv1i32(
@@ -1000,8 +990,7 @@ define <vscale x 2 x i16> @intrinsic_vnclip_vx_nxv2i16_nxv2i32(<vscale x 2 x i32
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32(
@@ -1180,8 +1169,7 @@ define <vscale x 1 x i32> @intrinsic_vnclip_vx_nxv1i32_nxv1i64(<vscale x 1 x i64
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclip.nxv1i32.nxv1i64(
@@ -1355,8 +1343,7 @@ define <vscale x 1 x i8> @intrinsic_vnclip_vi_nxv1i8_nxv1i16_i8(<vscale x 1 x i1
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv1i8_nxv1i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclip.nxv1i8.nxv1i16(
@@ -1388,8 +1375,7 @@ define <vscale x 2 x i8> @intrinsic_vnclip_vi_nxv2i8_nxv2i16_i8(<vscale x 2 x i1
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv2i8_nxv2i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclip.nxv2i8.nxv2i16(
@@ -1421,8 +1407,7 @@ define <vscale x 4 x i8> @intrinsic_vnclip_vi_nxv4i8_nxv4i16_i8(<vscale x 4 x i1
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv4i8_nxv4i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclip.nxv4i8.nxv4i16(
@@ -1553,8 +1538,7 @@ define <vscale x 1 x i16> @intrinsic_vnclip_vi_nxv1i16_nxv1i32_i16(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv1i16_nxv1i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclip.nxv1i16.nxv1i32(
@@ -1586,8 +1570,7 @@ define <vscale x 2 x i16> @intrinsic_vnclip_vi_nxv2i16_nxv2i32_i16(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv2i16_nxv2i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32(
@@ -1718,8 +1701,7 @@ define <vscale x 1 x i32> @intrinsic_vnclip_vi_nxv1i32_nxv1i64_i32(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv1i32_nxv1i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclip.nxv1i32.nxv1i64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll
index 5763be6d0c19..fd6422147c9a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i8> @intrinsic_vnclip_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv1i8_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclip.nxv1i8.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i8> @intrinsic_vnclip_wv_nxv2i8_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv2i8_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclip.nxv2i8.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i8> @intrinsic_vnclip_wv_nxv4i8_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv4i8_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclip.nxv4i8.nxv4i16.nxv4i8(
@@ -280,8 +277,7 @@ define <vscale x 1 x i16> @intrinsic_vnclip_wv_nxv1i16_nxv1i32_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv1i16_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclip.nxv1i16.nxv1i32.nxv1i16(
@@ -325,8 +321,7 @@ define <vscale x 2 x i16> @intrinsic_vnclip_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv2i16_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
@@ -505,8 +500,7 @@ define <vscale x 1 x i32> @intrinsic_vnclip_wv_nxv1i32_nxv1i64_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnclip_wv_nxv1i32_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclip.nxv1i32.nxv1i64.nxv1i32(
@@ -685,8 +679,7 @@ define <vscale x 1 x i8> @intrinsic_vnclip_vx_nxv1i8_nxv1i16(<vscale x 1 x i16>
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclip.nxv1i8.nxv1i16(
@@ -730,8 +723,7 @@ define <vscale x 2 x i8> @intrinsic_vnclip_vx_nxv2i8_nxv2i16(<vscale x 2 x i16>
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclip.nxv2i8.nxv2i16(
@@ -775,8 +767,7 @@ define <vscale x 4 x i8> @intrinsic_vnclip_vx_nxv4i8_nxv4i16(<vscale x 4 x i16>
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclip.nxv4i8.nxv4i16(
@@ -955,8 +946,7 @@ define <vscale x 1 x i16> @intrinsic_vnclip_vx_nxv1i16_nxv1i32(<vscale x 1 x i32
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclip.nxv1i16.nxv1i32(
@@ -1000,8 +990,7 @@ define <vscale x 2 x i16> @intrinsic_vnclip_vx_nxv2i16_nxv2i32(<vscale x 2 x i32
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32(
@@ -1180,8 +1169,7 @@ define <vscale x 1 x i32> @intrinsic_vnclip_vx_nxv1i32_nxv1i64(<vscale x 1 x i64
 ; CHECK-LABEL: intrinsic_vnclip_vx_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclip.nxv1i32.nxv1i64(
@@ -1355,8 +1343,7 @@ define <vscale x 1 x i8> @intrinsic_vnclip_vi_nxv1i8_nxv1i16_i8(<vscale x 1 x i1
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv1i8_nxv1i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclip.nxv1i8.nxv1i16(
@@ -1388,8 +1375,7 @@ define <vscale x 2 x i8> @intrinsic_vnclip_vi_nxv2i8_nxv2i16_i8(<vscale x 2 x i1
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv2i8_nxv2i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclip.nxv2i8.nxv2i16(
@@ -1421,8 +1407,7 @@ define <vscale x 4 x i8> @intrinsic_vnclip_vi_nxv4i8_nxv4i16_i8(<vscale x 4 x i1
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv4i8_nxv4i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclip.nxv4i8.nxv4i16(
@@ -1553,8 +1538,7 @@ define <vscale x 1 x i16> @intrinsic_vnclip_vi_nxv1i16_nxv1i32_i16(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv1i16_nxv1i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclip.nxv1i16.nxv1i32(
@@ -1586,8 +1570,7 @@ define <vscale x 2 x i16> @intrinsic_vnclip_vi_nxv2i16_nxv2i32_i16(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv2i16_nxv2i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32(
@@ -1718,8 +1701,7 @@ define <vscale x 1 x i32> @intrinsic_vnclip_vi_nxv1i32_nxv1i64_i32(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnclip_vi_nxv1i32_nxv1i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclip.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclip.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclip.nxv1i32.nxv1i64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll
index fbb1315d57bf..ee9f78ddf96c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i8> @intrinsic_vnclipu_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv1i8_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclipu.nxv1i8.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i8> @intrinsic_vnclipu_wv_nxv2i8_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv2i8_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclipu.nxv2i8.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i8> @intrinsic_vnclipu_wv_nxv4i8_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv4i8_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclipu.nxv4i8.nxv4i16.nxv4i8(
@@ -280,8 +277,7 @@ define <vscale x 1 x i16> @intrinsic_vnclipu_wv_nxv1i16_nxv1i32_nxv1i16(<vscale
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv1i16_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclipu.nxv1i16.nxv1i32.nxv1i16(
@@ -325,8 +321,7 @@ define <vscale x 2 x i16> @intrinsic_vnclipu_wv_nxv2i16_nxv2i32_nxv2i16(<vscale
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv2i16_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclipu.nxv2i16.nxv2i32.nxv2i16(
@@ -505,8 +500,7 @@ define <vscale x 1 x i32> @intrinsic_vnclipu_wv_nxv1i32_nxv1i64_nxv1i32(<vscale
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv1i32_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclipu.nxv1i32.nxv1i64.nxv1i32(
@@ -685,8 +679,7 @@ define <vscale x 1 x i8> @intrinsic_vnclipu_vx_nxv1i8_nxv1i16(<vscale x 1 x i16>
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclipu.nxv1i8.nxv1i16(
@@ -730,8 +723,7 @@ define <vscale x 2 x i8> @intrinsic_vnclipu_vx_nxv2i8_nxv2i16(<vscale x 2 x i16>
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclipu.nxv2i8.nxv2i16(
@@ -775,8 +767,7 @@ define <vscale x 4 x i8> @intrinsic_vnclipu_vx_nxv4i8_nxv4i16(<vscale x 4 x i16>
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclipu.nxv4i8.nxv4i16(
@@ -955,8 +946,7 @@ define <vscale x 1 x i16> @intrinsic_vnclipu_vx_nxv1i16_nxv1i32(<vscale x 1 x i3
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclipu.nxv1i16.nxv1i32(
@@ -1000,8 +990,7 @@ define <vscale x 2 x i16> @intrinsic_vnclipu_vx_nxv2i16_nxv2i32(<vscale x 2 x i3
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclipu.nxv2i16.nxv2i32(
@@ -1180,8 +1169,7 @@ define <vscale x 1 x i32> @intrinsic_vnclipu_vx_nxv1i32_nxv1i64(<vscale x 1 x i6
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclipu.nxv1i32.nxv1i64(
@@ -1355,8 +1343,7 @@ define <vscale x 1 x i8> @intrinsic_vnclipu_vi_nxv1i8_nxv1i16_i8(<vscale x 1 x i
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv1i8_nxv1i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclipu.nxv1i8.nxv1i16(
@@ -1388,8 +1375,7 @@ define <vscale x 2 x i8> @intrinsic_vnclipu_vi_nxv2i8_nxv2i16_i8(<vscale x 2 x i
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv2i8_nxv2i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclipu.nxv2i8.nxv2i16(
@@ -1421,8 +1407,7 @@ define <vscale x 4 x i8> @intrinsic_vnclipu_vi_nxv4i8_nxv4i16_i8(<vscale x 4 x i
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv4i8_nxv4i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclipu.nxv4i8.nxv4i16(
@@ -1553,8 +1538,7 @@ define <vscale x 1 x i16> @intrinsic_vnclipu_vi_nxv1i16_nxv1i32_i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv1i16_nxv1i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclipu.nxv1i16.nxv1i32(
@@ -1586,8 +1570,7 @@ define <vscale x 2 x i16> @intrinsic_vnclipu_vi_nxv2i16_nxv2i32_i16(<vscale x 2
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv2i16_nxv2i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclipu.nxv2i16.nxv2i32(
@@ -1718,8 +1701,7 @@ define <vscale x 1 x i32> @intrinsic_vnclipu_vi_nxv1i32_nxv1i64_i32(<vscale x 1
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv1i32_nxv1i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclipu.nxv1i32.nxv1i64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll
index 15c230c3012b..7844e529215f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i8> @intrinsic_vnclipu_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv1i8_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclipu.nxv1i8.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i8> @intrinsic_vnclipu_wv_nxv2i8_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv2i8_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclipu.nxv2i8.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i8> @intrinsic_vnclipu_wv_nxv4i8_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv4i8_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclipu.nxv4i8.nxv4i16.nxv4i8(
@@ -280,8 +277,7 @@ define <vscale x 1 x i16> @intrinsic_vnclipu_wv_nxv1i16_nxv1i32_nxv1i16(<vscale
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv1i16_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclipu.nxv1i16.nxv1i32.nxv1i16(
@@ -325,8 +321,7 @@ define <vscale x 2 x i16> @intrinsic_vnclipu_wv_nxv2i16_nxv2i32_nxv2i16(<vscale
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv2i16_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclipu.nxv2i16.nxv2i32.nxv2i16(
@@ -505,8 +500,7 @@ define <vscale x 1 x i32> @intrinsic_vnclipu_wv_nxv1i32_nxv1i64_nxv1i32(<vscale
 ; CHECK-LABEL: intrinsic_vnclipu_wv_nxv1i32_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclipu.nxv1i32.nxv1i64.nxv1i32(
@@ -685,8 +679,7 @@ define <vscale x 1 x i8> @intrinsic_vnclipu_vx_nxv1i8_nxv1i16(<vscale x 1 x i16>
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclipu.nxv1i8.nxv1i16(
@@ -730,8 +723,7 @@ define <vscale x 2 x i8> @intrinsic_vnclipu_vx_nxv2i8_nxv2i16(<vscale x 2 x i16>
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclipu.nxv2i8.nxv2i16(
@@ -775,8 +767,7 @@ define <vscale x 4 x i8> @intrinsic_vnclipu_vx_nxv4i8_nxv4i16(<vscale x 4 x i16>
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclipu.nxv4i8.nxv4i16(
@@ -955,8 +946,7 @@ define <vscale x 1 x i16> @intrinsic_vnclipu_vx_nxv1i16_nxv1i32(<vscale x 1 x i3
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclipu.nxv1i16.nxv1i32(
@@ -1000,8 +990,7 @@ define <vscale x 2 x i16> @intrinsic_vnclipu_vx_nxv2i16_nxv2i32(<vscale x 2 x i3
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclipu.nxv2i16.nxv2i32(
@@ -1180,8 +1169,7 @@ define <vscale x 1 x i32> @intrinsic_vnclipu_vx_nxv1i32_nxv1i64(<vscale x 1 x i6
 ; CHECK-LABEL: intrinsic_vnclipu_vx_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclipu.nxv1i32.nxv1i64(
@@ -1355,8 +1343,7 @@ define <vscale x 1 x i8> @intrinsic_vnclipu_vi_nxv1i8_nxv1i16_i8(<vscale x 1 x i
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv1i8_nxv1i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnclipu.nxv1i8.nxv1i16(
@@ -1388,8 +1375,7 @@ define <vscale x 2 x i8> @intrinsic_vnclipu_vi_nxv2i8_nxv2i16_i8(<vscale x 2 x i
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv2i8_nxv2i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnclipu.nxv2i8.nxv2i16(
@@ -1421,8 +1407,7 @@ define <vscale x 4 x i8> @intrinsic_vnclipu_vi_nxv4i8_nxv4i16_i8(<vscale x 4 x i
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv4i8_nxv4i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnclipu.nxv4i8.nxv4i16(
@@ -1553,8 +1538,7 @@ define <vscale x 1 x i16> @intrinsic_vnclipu_vi_nxv1i16_nxv1i32_i16(<vscale x 1
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv1i16_nxv1i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnclipu.nxv1i16.nxv1i32(
@@ -1586,8 +1570,7 @@ define <vscale x 2 x i16> @intrinsic_vnclipu_vi_nxv2i16_nxv2i32_i16(<vscale x 2
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv2i16_nxv2i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnclipu.nxv2i16.nxv2i32(
@@ -1718,8 +1701,7 @@ define <vscale x 1 x i32> @intrinsic_vnclipu_vi_nxv1i32_nxv1i64_i32(<vscale x 1
 ; CHECK-LABEL: intrinsic_vnclipu_vi_nxv1i32_nxv1i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnclipu.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnclipu.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnclipu.nxv1i32.nxv1i64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll
index ca526fa3750e..65bdc216dfef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i8> @intrinsic_vnsra_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv1i8_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsra.nxv1i8.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i8> @intrinsic_vnsra_wv_nxv2i8_nxv2i16_nxv2i8(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv2i8_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsra.nxv2i8.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i8> @intrinsic_vnsra_wv_nxv4i8_nxv4i16_nxv4i8(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv4i8_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsra.nxv4i8.nxv4i16.nxv4i8(
@@ -280,8 +277,7 @@ define <vscale x 1 x i16> @intrinsic_vnsra_wv_nxv1i16_nxv1i32_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv1i16_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsra.nxv1i16.nxv1i32.nxv1i16(
@@ -325,8 +321,7 @@ define <vscale x 2 x i16> @intrinsic_vnsra_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv2i16_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsra.nxv2i16.nxv2i32.nxv2i16(
@@ -505,8 +500,7 @@ define <vscale x 1 x i32> @intrinsic_vnsra_wv_nxv1i32_nxv1i64_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv1i32_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsra.nxv1i32.nxv1i64.nxv1i32(
@@ -685,8 +679,7 @@ define <vscale x 1 x i8> @intrinsic_vnsra_vx_nxv1i8_nxv1i16(<vscale x 1 x i16> %
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsra.nxv1i8.nxv1i16(
@@ -730,8 +723,7 @@ define <vscale x 2 x i8> @intrinsic_vnsra_vx_nxv2i8_nxv2i16(<vscale x 2 x i16> %
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsra.nxv2i8.nxv2i16(
@@ -775,8 +767,7 @@ define <vscale x 4 x i8> @intrinsic_vnsra_vx_nxv4i8_nxv4i16(<vscale x 4 x i16> %
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsra.nxv4i8.nxv4i16(
@@ -955,8 +946,7 @@ define <vscale x 1 x i16> @intrinsic_vnsra_vx_nxv1i16_nxv1i32(<vscale x 1 x i32>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsra.nxv1i16.nxv1i32(
@@ -1000,8 +990,7 @@ define <vscale x 2 x i16> @intrinsic_vnsra_vx_nxv2i16_nxv2i32(<vscale x 2 x i32>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsra.nxv2i16.nxv2i32(
@@ -1180,8 +1169,7 @@ define <vscale x 1 x i32> @intrinsic_vnsra_vx_nxv1i32_nxv1i64(<vscale x 1 x i64>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsra.nxv1i32.nxv1i64(
@@ -1355,8 +1343,7 @@ define <vscale x 1 x i8> @intrinsic_vnsra_vi_nxv1i8_nxv1i16_i8(<vscale x 1 x i16
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv1i8_nxv1i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsra.nxv1i8.nxv1i16(
@@ -1388,8 +1375,7 @@ define <vscale x 2 x i8> @intrinsic_vnsra_vi_nxv2i8_nxv2i16_i8(<vscale x 2 x i16
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv2i8_nxv2i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsra.nxv2i8.nxv2i16(
@@ -1421,8 +1407,7 @@ define <vscale x 4 x i8> @intrinsic_vnsra_vi_nxv4i8_nxv4i16_i8(<vscale x 4 x i16
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv4i8_nxv4i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsra.nxv4i8.nxv4i16(
@@ -1553,8 +1538,7 @@ define <vscale x 1 x i16> @intrinsic_vnsra_vi_nxv1i16_nxv1i32_i16(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv1i16_nxv1i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsra.nxv1i16.nxv1i32(
@@ -1586,8 +1570,7 @@ define <vscale x 2 x i16> @intrinsic_vnsra_vi_nxv2i16_nxv2i32_i16(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv2i16_nxv2i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsra.nxv2i16.nxv2i32(
@@ -1718,8 +1701,7 @@ define <vscale x 1 x i32> @intrinsic_vnsra_vi_nxv1i32_nxv1i64_i32(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv1i32_nxv1i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsra.nxv1i32.nxv1i64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll
index 39af759d287b..6c8f170acaa3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i8> @intrinsic_vnsra_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv1i8_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsra.nxv1i8.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i8> @intrinsic_vnsra_wv_nxv2i8_nxv2i16_nxv2i8(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv2i8_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsra.nxv2i8.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i8> @intrinsic_vnsra_wv_nxv4i8_nxv4i16_nxv4i8(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv4i8_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsra.nxv4i8.nxv4i16.nxv4i8(
@@ -280,8 +277,7 @@ define <vscale x 1 x i16> @intrinsic_vnsra_wv_nxv1i16_nxv1i32_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv1i16_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsra.nxv1i16.nxv1i32.nxv1i16(
@@ -325,8 +321,7 @@ define <vscale x 2 x i16> @intrinsic_vnsra_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv2i16_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsra.nxv2i16.nxv2i32.nxv2i16(
@@ -505,8 +500,7 @@ define <vscale x 1 x i32> @intrinsic_vnsra_wv_nxv1i32_nxv1i64_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnsra_wv_nxv1i32_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsra.nxv1i32.nxv1i64.nxv1i32(
@@ -685,8 +679,7 @@ define <vscale x 1 x i8> @intrinsic_vnsra_vx_nxv1i8_nxv1i16(<vscale x 1 x i16> %
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsra.nxv1i8.nxv1i16(
@@ -730,8 +723,7 @@ define <vscale x 2 x i8> @intrinsic_vnsra_vx_nxv2i8_nxv2i16(<vscale x 2 x i16> %
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsra.nxv2i8.nxv2i16(
@@ -775,8 +767,7 @@ define <vscale x 4 x i8> @intrinsic_vnsra_vx_nxv4i8_nxv4i16(<vscale x 4 x i16> %
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsra.nxv4i8.nxv4i16(
@@ -955,8 +946,7 @@ define <vscale x 1 x i16> @intrinsic_vnsra_vx_nxv1i16_nxv1i32(<vscale x 1 x i32>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsra.nxv1i16.nxv1i32(
@@ -1000,8 +990,7 @@ define <vscale x 2 x i16> @intrinsic_vnsra_vx_nxv2i16_nxv2i32(<vscale x 2 x i32>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsra.nxv2i16.nxv2i32(
@@ -1180,8 +1169,7 @@ define <vscale x 1 x i32> @intrinsic_vnsra_vx_nxv1i32_nxv1i64(<vscale x 1 x i64>
 ; CHECK-LABEL: intrinsic_vnsra_vx_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsra.nxv1i32.nxv1i64(
@@ -1355,8 +1343,7 @@ define <vscale x 1 x i8> @intrinsic_vnsra_vi_nxv1i8_nxv1i16_i8(<vscale x 1 x i16
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv1i8_nxv1i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsra.nxv1i8.nxv1i16(
@@ -1388,8 +1375,7 @@ define <vscale x 2 x i8> @intrinsic_vnsra_vi_nxv2i8_nxv2i16_i8(<vscale x 2 x i16
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv2i8_nxv2i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsra.nxv2i8.nxv2i16(
@@ -1421,8 +1407,7 @@ define <vscale x 4 x i8> @intrinsic_vnsra_vi_nxv4i8_nxv4i16_i8(<vscale x 4 x i16
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv4i8_nxv4i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsra.nxv4i8.nxv4i16(
@@ -1553,8 +1538,7 @@ define <vscale x 1 x i16> @intrinsic_vnsra_vi_nxv1i16_nxv1i32_i16(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv1i16_nxv1i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsra.nxv1i16.nxv1i32(
@@ -1586,8 +1570,7 @@ define <vscale x 2 x i16> @intrinsic_vnsra_vi_nxv2i16_nxv2i32_i16(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv2i16_nxv2i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsra.nxv2i16.nxv2i32(
@@ -1718,8 +1701,7 @@ define <vscale x 1 x i32> @intrinsic_vnsra_vi_nxv1i32_nxv1i64_i32(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsra_vi_nxv1i32_nxv1i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsra.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsra.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsra.nxv1i32.nxv1i64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll
index 2f0d16f1100c..8ac089f7e4a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i8> @intrinsic_vnsrl_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv1i8_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsrl.nxv1i8.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i8> @intrinsic_vnsrl_wv_nxv2i8_nxv2i16_nxv2i8(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i8_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsrl.nxv2i8.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i8> @intrinsic_vnsrl_wv_nxv4i8_nxv4i16_nxv4i8(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv4i8_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsrl.nxv4i8.nxv4i16.nxv4i8(
@@ -280,8 +277,7 @@ define <vscale x 1 x i16> @intrinsic_vnsrl_wv_nxv1i16_nxv1i32_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv1i16_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsrl.nxv1i16.nxv1i32.nxv1i16(
@@ -325,8 +321,7 @@ define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
@@ -505,8 +500,7 @@ define <vscale x 1 x i32> @intrinsic_vnsrl_wv_nxv1i32_nxv1i64_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv1i32_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsrl.nxv1i32.nxv1i64.nxv1i32(
@@ -685,8 +679,7 @@ define <vscale x 1 x i8> @intrinsic_vnsrl_vx_nxv1i8_nxv1i16(<vscale x 1 x i16> %
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsrl.nxv1i8.nxv1i16(
@@ -730,8 +723,7 @@ define <vscale x 2 x i8> @intrinsic_vnsrl_vx_nxv2i8_nxv2i16(<vscale x 2 x i16> %
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsrl.nxv2i8.nxv2i16(
@@ -775,8 +767,7 @@ define <vscale x 4 x i8> @intrinsic_vnsrl_vx_nxv4i8_nxv4i16(<vscale x 4 x i16> %
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsrl.nxv4i8.nxv4i16(
@@ -955,8 +946,7 @@ define <vscale x 1 x i16> @intrinsic_vnsrl_vx_nxv1i16_nxv1i32(<vscale x 1 x i32>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsrl.nxv1i16.nxv1i32(
@@ -1000,8 +990,7 @@ define <vscale x 2 x i16> @intrinsic_vnsrl_vx_nxv2i16_nxv2i32(<vscale x 2 x i32>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32(
@@ -1180,8 +1169,7 @@ define <vscale x 1 x i32> @intrinsic_vnsrl_vx_nxv1i32_nxv1i64(<vscale x 1 x i64>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsrl.nxv1i32.nxv1i64(
@@ -1355,8 +1343,7 @@ define <vscale x 1 x i8> @intrinsic_vnsrl_vi_nxv1i8_nxv1i16_i8(<vscale x 1 x i16
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv1i8_nxv1i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsrl.nxv1i8.nxv1i16(
@@ -1388,8 +1375,7 @@ define <vscale x 2 x i8> @intrinsic_vnsrl_vi_nxv2i8_nxv2i16_i8(<vscale x 2 x i16
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv2i8_nxv2i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsrl.nxv2i8.nxv2i16(
@@ -1421,8 +1407,7 @@ define <vscale x 4 x i8> @intrinsic_vnsrl_vi_nxv4i8_nxv4i16_i8(<vscale x 4 x i16
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv4i8_nxv4i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsrl.nxv4i8.nxv4i16(
@@ -1553,8 +1538,7 @@ define <vscale x 1 x i16> @intrinsic_vnsrl_vi_nxv1i16_nxv1i32_i16(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv1i16_nxv1i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsrl.nxv1i16.nxv1i32(
@@ -1586,8 +1570,7 @@ define <vscale x 2 x i16> @intrinsic_vnsrl_vi_nxv2i16_nxv2i32_i16(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv2i16_nxv2i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32(
@@ -1718,8 +1701,7 @@ define <vscale x 1 x i32> @intrinsic_vnsrl_vi_nxv1i32_nxv1i64_i32(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv1i32_nxv1i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsrl.nxv1i32.nxv1i64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll
index ab9eefa0453f..aeb5d42e8c21 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i8> @intrinsic_vnsrl_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv1i8_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsrl.nxv1i8.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i8> @intrinsic_vnsrl_wv_nxv2i8_nxv2i16_nxv2i8(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i8_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsrl.nxv2i8.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i8> @intrinsic_vnsrl_wv_nxv4i8_nxv4i16_nxv4i8(<vscale x 4 x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv4i8_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsrl.nxv4i8.nxv4i16.nxv4i8(
@@ -280,8 +277,7 @@ define <vscale x 1 x i16> @intrinsic_vnsrl_wv_nxv1i16_nxv1i32_nxv1i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv1i16_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsrl.nxv1i16.nxv1i32.nxv1i16(
@@ -325,8 +321,7 @@ define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
@@ -505,8 +500,7 @@ define <vscale x 1 x i32> @intrinsic_vnsrl_wv_nxv1i32_nxv1i64_nxv1i32(<vscale x
 ; CHECK-LABEL: intrinsic_vnsrl_wv_nxv1i32_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wv v25, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsrl.nxv1i32.nxv1i64.nxv1i32(
@@ -685,8 +679,7 @@ define <vscale x 1 x i8> @intrinsic_vnsrl_vx_nxv1i8_nxv1i16(<vscale x 1 x i16> %
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv1i8_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsrl.nxv1i8.nxv1i16(
@@ -730,8 +723,7 @@ define <vscale x 2 x i8> @intrinsic_vnsrl_vx_nxv2i8_nxv2i16(<vscale x 2 x i16> %
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv2i8_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsrl.nxv2i8.nxv2i16(
@@ -775,8 +767,7 @@ define <vscale x 4 x i8> @intrinsic_vnsrl_vx_nxv4i8_nxv4i16(<vscale x 4 x i16> %
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv4i8_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsrl.nxv4i8.nxv4i16(
@@ -955,8 +946,7 @@ define <vscale x 1 x i16> @intrinsic_vnsrl_vx_nxv1i16_nxv1i32(<vscale x 1 x i32>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv1i16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsrl.nxv1i16.nxv1i32(
@@ -1000,8 +990,7 @@ define <vscale x 2 x i16> @intrinsic_vnsrl_vx_nxv2i16_nxv2i32(<vscale x 2 x i32>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv2i16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32(
@@ -1180,8 +1169,7 @@ define <vscale x 1 x i32> @intrinsic_vnsrl_vx_nxv1i32_nxv1i64(<vscale x 1 x i64>
 ; CHECK-LABEL: intrinsic_vnsrl_vx_nxv1i32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wx v25, v8, a0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wx v8, v8, a0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsrl.nxv1i32.nxv1i64(
@@ -1355,8 +1343,7 @@ define <vscale x 1 x i8> @intrinsic_vnsrl_vi_nxv1i8_nxv1i16_i8(<vscale x 1 x i16
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv1i8_nxv1i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vnsrl.nxv1i8.nxv1i16(
@@ -1388,8 +1375,7 @@ define <vscale x 2 x i8> @intrinsic_vnsrl_vi_nxv2i8_nxv2i16_i8(<vscale x 2 x i16
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv2i8_nxv2i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vnsrl.nxv2i8.nxv2i16(
@@ -1421,8 +1407,7 @@ define <vscale x 4 x i8> @intrinsic_vnsrl_vi_nxv4i8_nxv4i16_i8(<vscale x 4 x i16
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv4i8_nxv4i16_i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vnsrl.nxv4i8.nxv4i16(
@@ -1553,8 +1538,7 @@ define <vscale x 1 x i16> @intrinsic_vnsrl_vi_nxv1i16_nxv1i32_i16(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv1i16_nxv1i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vnsrl.nxv1i16.nxv1i32(
@@ -1586,8 +1570,7 @@ define <vscale x 2 x i16> @intrinsic_vnsrl_vi_nxv2i16_nxv2i32_i16(<vscale x 2 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv2i16_nxv2i32_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32(
@@ -1718,8 +1701,7 @@ define <vscale x 1 x i32> @intrinsic_vnsrl_vi_nxv1i32_nxv1i64_i32(<vscale x 1 x
 ; CHECK-LABEL: intrinsic_vnsrl_vi_nxv1i32_nxv1i64_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 9
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vnsrl.nxv1i32.nxv1i64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode-rv32.ll
index 4b45d7430df3..b1e8f299e4e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode-rv32.ll
@@ -5,8 +5,7 @@ define <vscale x 1 x i8> @vtrunc_nxv1i16_nxv1i8(<vscale x 1 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i16> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %tvec
@@ -16,8 +15,7 @@ define <vscale x 2 x i8> @vtrunc_nxv2i16_nxv2i8(<vscale x 2 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i16> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %tvec
@@ -27,8 +25,7 @@ define <vscale x 4 x i8> @vtrunc_nxv4i16_nxv4i8(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i16> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %tvec
@@ -72,8 +69,7 @@ define <vscale x 1 x i16> @vtrunc_nxv1i32_nxv1i16(<vscale x 1 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i32> %va to <vscale x 1 x i16>
   ret <vscale x 1 x i16> %tvec
@@ -95,8 +91,7 @@ define <vscale x 2 x i16> @vtrunc_nxv2i32_nxv2i16(<vscale x 2 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i32> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %tvec
@@ -177,9 +172,9 @@ define <vscale x 1 x i8> @vtrunc_nxv1i64_nxv1i8(<vscale x 1 x i64> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v8, v26, 0
+; CHECK-NEXT:    vnsrl.wi v8, v25, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i64> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %tvec
@@ -201,8 +196,7 @@ define <vscale x 1 x i32> @vtrunc_nxv1i64_nxv1i32(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i64> %va to <vscale x 1 x i32>
   ret <vscale x 1 x i32> %tvec
@@ -214,9 +208,9 @@ define <vscale x 2 x i8> @vtrunc_nxv2i64_nxv2i8(<vscale x 2 x i64> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v8, v26, 0
+; CHECK-NEXT:    vnsrl.wi v8, v25, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i64> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %tvec

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode-rv64.ll
index e4c0191a8570..c4bc1751dfc7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode-rv64.ll
@@ -5,8 +5,7 @@ define <vscale x 1 x i8> @vtrunc_nxv1i16_nxv1i8(<vscale x 1 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i16> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %tvec
@@ -16,8 +15,7 @@ define <vscale x 2 x i8> @vtrunc_nxv2i16_nxv2i8(<vscale x 2 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i16> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %tvec
@@ -27,8 +25,7 @@ define <vscale x 4 x i8> @vtrunc_nxv4i16_nxv4i8(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i16> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %tvec
@@ -72,8 +69,7 @@ define <vscale x 1 x i16> @vtrunc_nxv1i32_nxv1i16(<vscale x 1 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i32> %va to <vscale x 1 x i16>
   ret <vscale x 1 x i16> %tvec
@@ -95,8 +91,7 @@ define <vscale x 2 x i16> @vtrunc_nxv2i32_nxv2i16(<vscale x 2 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i32> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %tvec
@@ -177,9 +172,9 @@ define <vscale x 1 x i8> @vtrunc_nxv1i64_nxv1i8(<vscale x 1 x i64> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vnsrl.wi v8, v26, 0
+; CHECK-NEXT:    vnsrl.wi v8, v25, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i64> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %tvec
@@ -201,8 +196,7 @@ define <vscale x 1 x i32> @vtrunc_nxv1i64_nxv1i32(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v25, v8, 0
-; CHECK-NEXT:    vmv1r.v v8, v25
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i64> %va to <vscale x 1 x i32>
   ret <vscale x 1 x i32> %tvec
@@ -214,9 +208,9 @@ define <vscale x 2 x i8> @vtrunc_nxv2i64_nxv2i8(<vscale x 2 x i64> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
 ; CHECK-NEXT:    vnsrl.wi v25, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vnsrl.wi v26, v25, 0
+; CHECK-NEXT:    vnsrl.wi v25, v25, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vnsrl.wi v8, v26, 0
+; CHECK-NEXT:    vnsrl.wi v8, v25, 0
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i64> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %tvec


        


More information about the llvm-commits mailing list