[llvm] [RISCV] Strip W suffix from ADDIW, SRLIW, and SRAIW (PR #68425)

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 6 08:43:27 PDT 2023


https://github.com/preames created https://github.com/llvm/llvm-project/pull/68425

The original motivation of this change was simply to reduce test duplication.  As can be seen in the (massive) test delta, we have many tests whose output differ only due to the use of addi on rv32 vs addiw on rv64 when the high bits are don't care.

However, after reading the isa specification, I believe this to also be a compressibility optimization.  There doesn't seem to be compressed versions of these instructions (or of slliw despite what the previous comment says), so using the non-W variant should allow the formation of more compressed instructions.

As an aside, we don't need to worry about the non-zero immediate restriction on the compressed variants because we're not directly forming the compressed variants.  If we happen to get a zero immediate for e.g. the ADDI, then either a later optimization will strip the useless instruction or the encoder is responsible for not compressing the instruction.

>From 0606f00b16c76c059e78dc8f57c01db40dbac621 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Fri, 6 Oct 2023 07:51:03 -0700
Subject: [PATCH] [RISCV] Strip W suffix from ADDIW, SRLIW, and SRAIW

The original motivation of this change was simply to reduce test duplication.  As can be seen in the (massive) test delta, we have many tests whose output differ only due to the use of addi on rv32 vs addiw on rv64 when the high bits are don't care.

However, after reading the isa specification, I believe this to also be a compressibility optimization.  There doesn't seem to be compressed versions of these instructions (or of slliw despite what the previous comment says), so using the non-W variant should allow the formation of more compressed instructions.

As an aside, we don't need to worry about the non-zero immediate restriction on the compressed variants because we're not directly forming the compressed variants.  If we happen to get a zero immediate for e.g. the ADDI, then either a later optimization will strip the useless instruction or the encoder is responsible for not compressing the instruction.
---
 llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp     |   14 +-
 .../CodeGen/RISCV/GlobalISel/alu-roundtrip.ll |    4 +-
 llvm/test/CodeGen/RISCV/add-before-shl.ll     |   12 +-
 llvm/test/CodeGen/RISCV/add-imm.ll            |   20 +-
 llvm/test/CodeGen/RISCV/addimm-mulimm.ll      |   56 +-
 llvm/test/CodeGen/RISCV/and.ll                |    2 +-
 llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll     |   32 +-
 llvm/test/CodeGen/RISCV/atomic-rmw.ll         |  182 +-
 llvm/test/CodeGen/RISCV/atomic-signext.ll     |   22 +-
 .../CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll |    4 +-
 llvm/test/CodeGen/RISCV/bfloat-convert.ll     |    8 +-
 llvm/test/CodeGen/RISCV/bfloat.ll             |    8 +-
 llvm/test/CodeGen/RISCV/bittest.ll            |    8 +-
 llvm/test/CodeGen/RISCV/bswap-bitreverse.ll   |    6 +-
 llvm/test/CodeGen/RISCV/calling-conv-half.ll  |    2 +-
 llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll    |   24 +-
 .../CodeGen/RISCV/ctz_zero_return_test.ll     |   16 +-
 llvm/test/CodeGen/RISCV/div-by-constant.ll    |    4 +-
 llvm/test/CodeGen/RISCV/div-pow2.ll           |   28 +-
 llvm/test/CodeGen/RISCV/div.ll                |   20 +-
 llvm/test/CodeGen/RISCV/double-convert.ll     |   22 +-
 .../early-clobber-tied-def-subreg-liveness.ll |    2 +-
 llvm/test/CodeGen/RISCV/float-convert.ll      |   18 +-
 .../test/CodeGen/RISCV/fold-addi-loadstore.ll |    4 +-
 llvm/test/CodeGen/RISCV/half-convert.ll       |   46 +-
 .../CodeGen/RISCV/hoist-global-addr-base.ll   |   44 +-
 llvm/test/CodeGen/RISCV/iabs.ll               |    6 +-
 llvm/test/CodeGen/RISCV/imm.ll                |   20 +-
 .../RISCV/lack-of-signed-truncation-check.ll  |   36 +-
 .../CodeGen/RISCV/machine-outliner-throw.ll   |    4 +-
 llvm/test/CodeGen/RISCV/memcpy.ll             |    6 +-
 llvm/test/CodeGen/RISCV/neg-abs.ll            |   10 +-
 .../test/CodeGen/RISCV/overflow-intrinsics.ll |   12 +-
 llvm/test/CodeGen/RISCV/rem.ll                |   18 +-
 .../CodeGen/RISCV/riscv-codegenprepare-asm.ll |    8 +-
 .../test/CodeGen/RISCV/rv64i-demanded-bits.ll |    6 +-
 .../CodeGen/RISCV/rv64i-exhaustive-w-insts.ll |   16 +-
 llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll   |    4 +-
 .../RISCV/rv64i-w-insts-legalization.ll       |    6 +-
 llvm/test/CodeGen/RISCV/rv64xtheadbb.ll       |   10 +-
 llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll  |    2 +-
 llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll       |    4 +-
 llvm/test/CodeGen/RISCV/rv64zbb.ll            |   28 +-
 llvm/test/CodeGen/RISCV/rv64zbkb.ll           |    2 +-
 .../CodeGen/RISCV/rvv/bitreverse-sdnode.ll    | 1055 ++--
 llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll  | 2655 +++-----
 llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll   |  230 +-
 llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll       |  460 +-
 llvm/test/CodeGen/RISCV/rvv/combine-sats.ll   |   47 +-
 llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll    | 2237 +++----
 llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll        |  391 +-
 llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll   |  785 +--
 llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll       | 1649 ++---
 llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll    | 1919 ++----
 llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll        | 2086 ++-----
 .../CodeGen/RISCV/rvv/extractelt-int-rv64.ll  |    4 +-
 .../RISCV/rvv/fixed-vectors-bitreverse-vp.ll  | 2005 ++----
 .../RISCV/rvv/fixed-vectors-bitreverse.ll     |   42 +-
 .../RISCV/rvv/fixed-vectors-bswap-vp.ll       |  368 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll  |   56 +-
 .../rvv/fixed-vectors-buildvec-of-binop.ll    |    4 +-
 .../RISCV/rvv/fixed-vectors-ctlz-vp.ll        | 5504 +++++++----------
 .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll   |  815 +--
 .../RISCV/rvv/fixed-vectors-ctpop-vp.ll       | 1144 ++--
 .../CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll  |  661 +-
 .../RISCV/rvv/fixed-vectors-cttz-vp.ll        | 2688 +++-----
 .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll   |  689 +--
 .../RISCV/rvv/fixed-vectors-extract.ll        |    6 +-
 .../RISCV/rvv/fixed-vectors-fp-interleave.ll  |  151 +-
 .../RISCV/rvv/fixed-vectors-fp-shuffles.ll    |   74 +-
 .../RISCV/rvv/fixed-vectors-int-buildvec.ll   |  138 +-
 .../RISCV/rvv/fixed-vectors-int-interleave.ll |  213 +-
 .../RISCV/rvv/fixed-vectors-int-shuffles.ll   |  163 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-int.ll    |  987 +--
 .../rvv/fixed-vectors-interleaved-access.ll   |    4 +-
 .../RISCV/rvv/fixed-vectors-mask-buildvec.ll  |  101 +-
 .../RISCV/rvv/fixed-vectors-masked-gather.ll  |    2 +-
 .../rvv/fixed-vectors-reduction-formation.ll  |   52 +-
 .../rvv/fixed-vectors-shuffle-transpose.ll    |   89 +-
 .../rvv/fixed-vectors-shuffle-vslide1up.ll    |   31 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-store.ll  |  119 +-
 .../fixed-vectors-strided-load-store-asm.ll   |   10 +-
 .../test/CodeGen/RISCV/rvv/fold-vector-cmp.ll |    2 +-
 .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll     |   36 +-
 .../RISCV/rvv/named-vector-shuffle-reverse.ll |  814 +--
 llvm/test/CodeGen/RISCV/rvv/pr61561.ll        |    2 +-
 llvm/test/CodeGen/RISCV/rvv/select-sra.ll     |    4 +-
 .../test/CodeGen/RISCV/rvv/shuffle-reverse.ll |  180 +-
 .../CodeGen/RISCV/rvv/sink-splat-operands.ll  |   44 +-
 llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll   |    8 +-
 llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll |   16 +-
 llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll    |  196 +-
 llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll   |  309 +-
 .../RISCV/rvv/vector-interleave-fixed.ll      |  115 +-
 llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll    |  232 +-
 llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll   |  341 +-
 .../CodeGen/RISCV/select-binop-identity.ll    |    4 +-
 .../test/CodeGen/RISCV/select-constant-xor.ll |    2 +-
 llvm/test/CodeGen/RISCV/select-to-and-zext.ll |    4 +-
 llvm/test/CodeGen/RISCV/select.ll             |   18 +-
 .../CodeGen/RISCV/selectcc-to-shiftand.ll     |   19 +-
 llvm/test/CodeGen/RISCV/sextw-removal.ll      |    6 +-
 llvm/test/CodeGen/RISCV/shl-demanded.ll       |    2 +-
 llvm/test/CodeGen/RISCV/shlimm-addimm.ll      |   20 +-
 .../CodeGen/RISCV/signed-truncation-check.ll  |   36 +-
 llvm/test/CodeGen/RISCV/srem-lkk.ll           |   26 +-
 .../CodeGen/RISCV/srem-seteq-illegal-types.ll |   10 +-
 llvm/test/CodeGen/RISCV/srem-vector-lkk.ll    |    4 +-
 llvm/test/CodeGen/RISCV/urem-lkk.ll           |    6 +-
 .../CodeGen/RISCV/urem-seteq-illegal-types.ll |   26 +-
 llvm/test/CodeGen/RISCV/urem-vector-lkk.ll    |    2 +-
 llvm/test/CodeGen/RISCV/vararg.ll             |    8 +-
 llvm/test/CodeGen/RISCV/xaluo.ll              |    6 +-
 113 files changed, 11518 insertions(+), 21420 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 439a1bb6e1e69d2..3a756db977904b6 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -12,11 +12,12 @@
 // extended bits aren't consumed or because the input was already sign extended
 // by an earlier instruction.
 //
-// Then it removes the -w suffix from addw, slliw and mulw instructions
-// whenever all users are dependent only on the lower word of the result of the
-// instruction. We do this only for addw, slliw, and mulw because the -w forms
-// are less compressible: c.add and c.slli have a larger register encoding than
-// their w counterparts, and there's no compressible version of mulw.
+// Then it removes the -w suffix from opw instructions whenever all users are
+// dependent only on the lower word of the result of the instruction.  This is
+// profitable for addw because c.add has a larger register encoding than c.addw.
+// For the remaining opw instructions, there is no compressed w variant. This
+// tramsform also has the side effect of making RV32 and RV64 codegen for 32
+// bit constants match which helps reduce check duplication in LIT tests.
 //
 //===---------------------------------------------------------------------===//
 
@@ -661,8 +662,11 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
       default:
         continue;
       case RISCV::ADDW:  Opc = RISCV::ADD;  break;
+      case RISCV::ADDIW: Opc = RISCV::ADDI; break;
       case RISCV::MULW:  Opc = RISCV::MUL;  break;
       case RISCV::SLLIW: Opc = RISCV::SLLI; break;
+      case RISCV::SRLIW: Opc = RISCV::SRLI; break;
+      case RISCV::SRAIW: Opc = RISCV::SRAI; break;
       }
 
       if (hasAllWUsers(MI, ST, MRI)) {
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
index c503d6541b0a577..16a81b79b2f3fc2 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
@@ -34,9 +34,9 @@ define i32 @add_i8_signext_i32(i8 %a, i8 %b) {
 ; RV64IM-LABEL: add_i8_signext_i32:
 ; RV64IM:       # %bb.0: # %entry
 ; RV64IM-NEXT:    slli a0, a0, 24
-; RV64IM-NEXT:    sraiw a0, a0, 24
+; RV64IM-NEXT:    srai a0, a0, 24
 ; RV64IM-NEXT:    slli a1, a1, 24
-; RV64IM-NEXT:    sraiw a1, a1, 24
+; RV64IM-NEXT:    srai a1, a1, 24
 ; RV64IM-NEXT:    addw a0, a0, a1
 ; RV64IM-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll
index a41664fde38581c..274f1cef49aa955 100644
--- a/llvm/test/CodeGen/RISCV/add-before-shl.ll
+++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll
@@ -25,7 +25,7 @@ define signext i32 @add_small_const(i32 signext %a) nounwind {
 ;
 ; RV64I-LABEL: add_small_const:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 1
+; RV64I-NEXT:    addi a0, a0, 1
 ; RV64I-NEXT:    slli a0, a0, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    jalr zero, 0(ra)
@@ -39,7 +39,7 @@ define signext i32 @add_small_const(i32 signext %a) nounwind {
 ;
 ; RV64C-LABEL: add_small_const:
 ; RV64C:       # %bb.0:
-; RV64C-NEXT:    c.addiw a0, 1
+; RV64C-NEXT:    c.addi a0, 1
 ; RV64C-NEXT:    c.slli a0, 56
 ; RV64C-NEXT:    c.srai a0, 56
 ; RV64C-NEXT:    c.jr ra
@@ -78,7 +78,7 @@ define signext i32 @add_large_const(i32 signext %a) nounwind {
 ; RV64C-LABEL: add_large_const:
 ; RV64C:       # %bb.0:
 ; RV64C-NEXT:    c.lui a1, 1
-; RV64C-NEXT:    c.addiw a1, -1
+; RV64C-NEXT:    c.addi a1, -1
 ; RV64C-NEXT:    c.add a0, a1
 ; RV64C-NEXT:    c.slli a0, 48
 ; RV64C-NEXT:    c.srai a0, 48
@@ -118,7 +118,7 @@ define signext i32 @add_huge_const(i32 signext %a) nounwind {
 ; RV64C-LABEL: add_huge_const:
 ; RV64C:       # %bb.0:
 ; RV64C-NEXT:    c.lui a1, 8
-; RV64C-NEXT:    c.addiw a1, -1
+; RV64C-NEXT:    c.addi a1, -1
 ; RV64C-NEXT:    c.add a0, a1
 ; RV64C-NEXT:    c.slli a0, 48
 ; RV64C-NEXT:    c.srai a0, 48
@@ -139,7 +139,7 @@ define signext i24 @add_non_machine_type(i24 signext %a) nounwind {
 ;
 ; RV64I-LABEL: add_non_machine_type:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 256
+; RV64I-NEXT:    addi a0, a0, 256
 ; RV64I-NEXT:    slli a0, a0, 52
 ; RV64I-NEXT:    srai a0, a0, 40
 ; RV64I-NEXT:    jalr zero, 0(ra)
@@ -153,7 +153,7 @@ define signext i24 @add_non_machine_type(i24 signext %a) nounwind {
 ;
 ; RV64C-LABEL: add_non_machine_type:
 ; RV64C:       # %bb.0:
-; RV64C-NEXT:    addiw a0, a0, 256
+; RV64C-NEXT:    addi a0, a0, 256
 ; RV64C-NEXT:    c.slli a0, 52
 ; RV64C-NEXT:    c.srai a0, 40
 ; RV64C-NEXT:    c.jr ra
diff --git a/llvm/test/CodeGen/RISCV/add-imm.ll b/llvm/test/CodeGen/RISCV/add-imm.ll
index 700fec0192d3e74..52751f1c224211f 100644
--- a/llvm/test/CodeGen/RISCV/add-imm.ll
+++ b/llvm/test/CodeGen/RISCV/add-imm.ll
@@ -29,7 +29,7 @@ define i32 @add_positive_low_bound_accept(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: add_positive_low_bound_accept:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 2047
+; RV64I-NEXT:    addi a0, a0, 2047
 ; RV64I-NEXT:    addiw a0, a0, 1
 ; RV64I-NEXT:    ret
   %1 = add i32 %a, 2048
@@ -45,7 +45,7 @@ define i32 @add_positive_high_bound_accept(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: add_positive_high_bound_accept:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 2047
+; RV64I-NEXT:    addi a0, a0, 2047
 ; RV64I-NEXT:    addiw a0, a0, 2047
 ; RV64I-NEXT:    ret
   %1 = add i32 %a, 4094
@@ -63,7 +63,7 @@ define i32 @add_positive_high_bound_reject(i32 %a) nounwind {
 ; RV64I-LABEL: add_positive_high_bound_reject:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a1, 1
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %1 = add i32 %a, 4095
@@ -93,7 +93,7 @@ define i32 @add_negative_high_bound_accept(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: add_negative_high_bound_accept:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, -2048
+; RV64I-NEXT:    addi a0, a0, -2048
 ; RV64I-NEXT:    addiw a0, a0, -1
 ; RV64I-NEXT:    ret
   %1 = add i32 %a, -2049
@@ -109,7 +109,7 @@ define i32 @add_negative_low_bound_accept(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: add_negative_low_bound_accept:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, -2048
+; RV64I-NEXT:    addi a0, a0, -2048
 ; RV64I-NEXT:    addiw a0, a0, -2048
 ; RV64I-NEXT:    ret
   %1 = add i32 %a, -4096
@@ -127,7 +127,7 @@ define i32 @add_negative_low_bound_reject(i32 %a) nounwind {
 ; RV64I-LABEL: add_negative_low_bound_reject:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a1, 1048575
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %1 = add i32 %a, -4097
@@ -143,7 +143,7 @@ define i32 @add32_accept(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: add32_accept:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 2047
+; RV64I-NEXT:    addi a0, a0, 2047
 ; RV64I-NEXT:    addiw a0, a0, 952
 ; RV64I-NEXT:    ret
   %1 = add i32 %a, 2999
@@ -159,7 +159,7 @@ define signext i32 @add32_sext_accept(i32 signext %a) nounwind {
 ;
 ; RV64I-LABEL: add32_sext_accept:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 2047
+; RV64I-NEXT:    addi a0, a0, 2047
 ; RV64I-NEXT:    addiw a0, a0, 952
 ; RV64I-NEXT:    ret
   %1 = add i32 %a, 2999
@@ -178,7 +178,7 @@ define signext i32 @add32_sext_reject_on_rv64(i32 signext %a) nounwind {
 ;
 ; RV64I-LABEL: add32_sext_reject_on_rv64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 2047
+; RV64I-NEXT:    addi a0, a0, 2047
 ; RV64I-NEXT:    addiw a0, a0, 953
 ; RV64I-NEXT:    lui a1, %hi(gv0)
 ; RV64I-NEXT:    sw a0, %lo(gv0)(a1)
@@ -231,7 +231,7 @@ define void @add32_reject() nounwind {
 ; RV64I-NEXT:    lui a2, %hi(gb)
 ; RV64I-NEXT:    lw a3, %lo(gb)(a2)
 ; RV64I-NEXT:    lui a4, 1
-; RV64I-NEXT:    addiw a4, a4, -1096
+; RV64I-NEXT:    addi a4, a4, -1096
 ; RV64I-NEXT:    add a1, a1, a4
 ; RV64I-NEXT:    add a3, a3, a4
 ; RV64I-NEXT:    sw a1, %lo(ga)(a0)
diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
index d1bc480455dd35f..48fa69e10456563 100644
--- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
+++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
@@ -84,7 +84,7 @@ define i32 @add_mul_combine_accept_b1(i32 %x) {
 ; RV64IMB-NEXT:    li a1, 23
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    lui a1, 50
-; RV64IMB-NEXT:    addiw a1, a1, 1119
+; RV64IMB-NEXT:    addi a1, a1, 1119
 ; RV64IMB-NEXT:    addw a0, a0, a1
 ; RV64IMB-NEXT:    ret
   %tmp0 = add i32 %x, 8953
@@ -107,7 +107,7 @@ define signext i32 @add_mul_combine_accept_b2(i32 signext %x) {
 ; RV64IMB-NEXT:    li a1, 23
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    lui a1, 50
-; RV64IMB-NEXT:    addiw a1, a1, 1119
+; RV64IMB-NEXT:    addi a1, a1, 1119
 ; RV64IMB-NEXT:    addw a0, a0, a1
 ; RV64IMB-NEXT:    ret
   %tmp0 = add i32 %x, 8953
@@ -153,7 +153,7 @@ define i32 @add_mul_combine_reject_a1(i32 %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_a1:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 1971
+; RV64IMB-NEXT:    addi a0, a0, 1971
 ; RV64IMB-NEXT:    li a1, 29
 ; RV64IMB-NEXT:    mulw a0, a0, a1
 ; RV64IMB-NEXT:    ret
@@ -172,7 +172,7 @@ define signext i32 @add_mul_combine_reject_a2(i32 signext %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_a2:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 1971
+; RV64IMB-NEXT:    addi a0, a0, 1971
 ; RV64IMB-NEXT:    li a1, 29
 ; RV64IMB-NEXT:    mulw a0, a0, a1
 ; RV64IMB-NEXT:    ret
@@ -217,7 +217,7 @@ define i32 @add_mul_combine_reject_c1(i32 %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_c1:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 1000
+; RV64IMB-NEXT:    addi a0, a0, 1000
 ; RV64IMB-NEXT:    sh3add a1, a0, a0
 ; RV64IMB-NEXT:    sh3add a0, a1, a0
 ; RV64IMB-NEXT:    sext.w a0, a0
@@ -237,7 +237,7 @@ define signext i32 @add_mul_combine_reject_c2(i32 signext %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_c2:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 1000
+; RV64IMB-NEXT:    addi a0, a0, 1000
 ; RV64IMB-NEXT:    sh3add a1, a0, a0
 ; RV64IMB-NEXT:    sh3add a0, a1, a0
 ; RV64IMB-NEXT:    sext.w a0, a0
@@ -349,7 +349,7 @@ define i32 @add_mul_combine_reject_e1(i32 %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_e1:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 1971
+; RV64IMB-NEXT:    addi a0, a0, 1971
 ; RV64IMB-NEXT:    li a1, 29
 ; RV64IMB-NEXT:    mulw a0, a0, a1
 ; RV64IMB-NEXT:    ret
@@ -368,7 +368,7 @@ define signext i32 @add_mul_combine_reject_e2(i32 signext %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_e2:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 1971
+; RV64IMB-NEXT:    addi a0, a0, 1971
 ; RV64IMB-NEXT:    li a1, 29
 ; RV64IMB-NEXT:    mulw a0, a0, a1
 ; RV64IMB-NEXT:    ret
@@ -414,7 +414,7 @@ define i32 @add_mul_combine_reject_f1(i32 %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_f1:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 1972
+; RV64IMB-NEXT:    addi a0, a0, 1972
 ; RV64IMB-NEXT:    li a1, 29
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, 11
@@ -435,7 +435,7 @@ define signext i32 @add_mul_combine_reject_f2(i32 signext %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_f2:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 1972
+; RV64IMB-NEXT:    addi a0, a0, 1972
 ; RV64IMB-NEXT:    li a1, 29
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, 11
@@ -483,7 +483,7 @@ define i32 @add_mul_combine_reject_g1(i32 %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_g1:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 100
+; RV64IMB-NEXT:    addi a0, a0, 100
 ; RV64IMB-NEXT:    sh3add a1, a0, a0
 ; RV64IMB-NEXT:    sh3add a0, a1, a0
 ; RV64IMB-NEXT:    addiw a0, a0, 10
@@ -504,7 +504,7 @@ define signext i32 @add_mul_combine_reject_g2(i32 signext %x) {
 ;
 ; RV64IMB-LABEL: add_mul_combine_reject_g2:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 100
+; RV64IMB-NEXT:    addi a0, a0, 100
 ; RV64IMB-NEXT:    sh3add a1, a0, a0
 ; RV64IMB-NEXT:    sh3add a0, a1, a0
 ; RV64IMB-NEXT:    addiw a0, a0, 10
@@ -581,9 +581,9 @@ define i32 @mul3000_add8990_a(i32 %x) {
 ;
 ; RV64IMB-LABEL: mul3000_add8990_a:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 3
+; RV64IMB-NEXT:    addi a0, a0, 3
 ; RV64IMB-NEXT:    lui a1, 1
-; RV64IMB-NEXT:    addiw a1, a1, -1096
+; RV64IMB-NEXT:    addi a1, a1, -1096
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, -10
 ; RV64IMB-NEXT:    ret
@@ -604,9 +604,9 @@ define signext i32 @mul3000_add8990_b(i32 signext %x) {
 ;
 ; RV64IMB-LABEL: mul3000_add8990_b:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 3
+; RV64IMB-NEXT:    addi a0, a0, 3
 ; RV64IMB-NEXT:    lui a1, 1
-; RV64IMB-NEXT:    addiw a1, a1, -1096
+; RV64IMB-NEXT:    addi a1, a1, -1096
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, -10
 ; RV64IMB-NEXT:    ret
@@ -656,9 +656,9 @@ define i32 @mul3000_sub8990_a(i32 %x) {
 ;
 ; RV64IMB-LABEL: mul3000_sub8990_a:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, -3
+; RV64IMB-NEXT:    addi a0, a0, -3
 ; RV64IMB-NEXT:    lui a1, 1
-; RV64IMB-NEXT:    addiw a1, a1, -1096
+; RV64IMB-NEXT:    addi a1, a1, -1096
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, 10
 ; RV64IMB-NEXT:    ret
@@ -679,9 +679,9 @@ define signext i32 @mul3000_sub8990_b(i32 signext %x) {
 ;
 ; RV64IMB-LABEL: mul3000_sub8990_b:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, -3
+; RV64IMB-NEXT:    addi a0, a0, -3
 ; RV64IMB-NEXT:    lui a1, 1
-; RV64IMB-NEXT:    addiw a1, a1, -1096
+; RV64IMB-NEXT:    addi a1, a1, -1096
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, 10
 ; RV64IMB-NEXT:    ret
@@ -732,9 +732,9 @@ define i32 @mulneg3000_add8990_a(i32 %x) {
 ;
 ; RV64IMB-LABEL: mulneg3000_add8990_a:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, -3
+; RV64IMB-NEXT:    addi a0, a0, -3
 ; RV64IMB-NEXT:    lui a1, 1048575
-; RV64IMB-NEXT:    addiw a1, a1, 1096
+; RV64IMB-NEXT:    addi a1, a1, 1096
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, -10
 ; RV64IMB-NEXT:    ret
@@ -755,9 +755,9 @@ define signext i32 @mulneg3000_add8990_b(i32 signext %x) {
 ;
 ; RV64IMB-LABEL: mulneg3000_add8990_b:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, -3
+; RV64IMB-NEXT:    addi a0, a0, -3
 ; RV64IMB-NEXT:    lui a1, 1048575
-; RV64IMB-NEXT:    addiw a1, a1, 1096
+; RV64IMB-NEXT:    addi a1, a1, 1096
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, -10
 ; RV64IMB-NEXT:    ret
@@ -808,9 +808,9 @@ define i32 @mulneg3000_sub8990_a(i32 %x) {
 ;
 ; RV64IMB-LABEL: mulneg3000_sub8990_a:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 3
+; RV64IMB-NEXT:    addi a0, a0, 3
 ; RV64IMB-NEXT:    lui a1, 1048575
-; RV64IMB-NEXT:    addiw a1, a1, 1096
+; RV64IMB-NEXT:    addi a1, a1, 1096
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, 10
 ; RV64IMB-NEXT:    ret
@@ -831,9 +831,9 @@ define signext i32 @mulneg3000_sub8990_b(i32 signext %x) {
 ;
 ; RV64IMB-LABEL: mulneg3000_sub8990_b:
 ; RV64IMB:       # %bb.0:
-; RV64IMB-NEXT:    addiw a0, a0, 3
+; RV64IMB-NEXT:    addi a0, a0, 3
 ; RV64IMB-NEXT:    lui a1, 1048575
-; RV64IMB-NEXT:    addiw a1, a1, 1096
+; RV64IMB-NEXT:    addi a1, a1, 1096
 ; RV64IMB-NEXT:    mul a0, a0, a1
 ; RV64IMB-NEXT:    addiw a0, a0, 10
 ; RV64IMB-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/and.ll b/llvm/test/CodeGen/RISCV/and.ll
index 5eff422013da6a8..79e3b954c50d8d8 100644
--- a/llvm/test/CodeGen/RISCV/and.ll
+++ b/llvm/test/CodeGen/RISCV/and.ll
@@ -195,7 +195,7 @@ define i64 @and64_0x7fffffff00000000(i64 %x) {
 ; RV64I-LABEL: and64_0x7fffffff00000000:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a1, 524288
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    slli a1, a1, 32
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
index f900b5161f75128..eea4cb72938af23 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
@@ -1104,7 +1104,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
 ; RV64IA-NEXT:    andi a3, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a5, a4, a0
 ; RV64IA-NEXT:    and a1, a1, a4
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -1206,7 +1206,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-WMO-NEXT:    andi a3, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a5, a4, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a4
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -1230,7 +1230,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a5, a4, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a4
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -1332,7 +1332,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-WMO-NEXT:    andi a3, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a5, a4, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a4
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -1356,7 +1356,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a5, a4, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a4
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -1458,7 +1458,7 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-WMO-NEXT:    andi a3, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a5, a4, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a4
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -1482,7 +1482,7 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a5, a4, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a4
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -1584,7 +1584,7 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-WMO-NEXT:    andi a3, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a5, a4, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a4
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -1608,7 +1608,7 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a5, a4, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a4
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -1710,7 +1710,7 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-WMO-NEXT:    andi a3, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a5, a4, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a4
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -1734,7 +1734,7 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a5, a4, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a4
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -1836,7 +1836,7 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-WMO-NEXT:    andi a3, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a5, a4, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a4
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -1860,7 +1860,7 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-TSO-NEXT:    andi a3, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a5, a4, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a4
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -1938,7 +1938,7 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; RV64IA-NEXT:    andi a3, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a5, a4, a0
 ; RV64IA-NEXT:    and a1, a1, a4
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -2016,7 +2016,7 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-NEXT:    andi a3, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a5, a4, a0
 ; RV64IA-NEXT:    and a1, a1, a4
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -2094,7 +2094,7 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; RV64IA-NEXT:    andi a3, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a5, a4, a0
 ; RV64IA-NEXT:    and a1, a1, a4
 ; RV64IA-NEXT:    sllw a1, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index c4f224dcba1b21e..e97a1ea5dfca009 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -6864,7 +6864,7 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -6951,7 +6951,7 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -6972,7 +6972,7 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -7059,7 +7059,7 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -7080,7 +7080,7 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -7167,7 +7167,7 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -7188,7 +7188,7 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -7254,7 +7254,7 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -7316,7 +7316,7 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
 ; RV64IA-NEXT:    andi a1, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a2, 16
-; RV64IA-NEXT:    addiw a2, a2, -1
+; RV64IA-NEXT:    addi a2, a2, -1
 ; RV64IA-NEXT:    sllw a2, a2, a0
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    amoand.w a1, a2, (a1)
@@ -7378,7 +7378,7 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 ; RV64IA-WMO-NEXT:    andi a1, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a2, 16
-; RV64IA-WMO-NEXT:    addiw a2, a2, -1
+; RV64IA-WMO-NEXT:    addi a2, a2, -1
 ; RV64IA-WMO-NEXT:    sllw a2, a2, a0
 ; RV64IA-WMO-NEXT:    not a2, a2
 ; RV64IA-WMO-NEXT:    amoand.w.aq a1, a2, (a1)
@@ -7390,7 +7390,7 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    andi a1, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a2, 16
-; RV64IA-TSO-NEXT:    addiw a2, a2, -1
+; RV64IA-TSO-NEXT:    addi a2, a2, -1
 ; RV64IA-TSO-NEXT:    sllw a2, a2, a0
 ; RV64IA-TSO-NEXT:    not a2, a2
 ; RV64IA-TSO-NEXT:    amoand.w a1, a2, (a1)
@@ -7452,7 +7452,7 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 ; RV64IA-WMO-NEXT:    andi a1, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a2, 16
-; RV64IA-WMO-NEXT:    addiw a2, a2, -1
+; RV64IA-WMO-NEXT:    addi a2, a2, -1
 ; RV64IA-WMO-NEXT:    sllw a2, a2, a0
 ; RV64IA-WMO-NEXT:    not a2, a2
 ; RV64IA-WMO-NEXT:    amoand.w.rl a1, a2, (a1)
@@ -7464,7 +7464,7 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    andi a1, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a2, 16
-; RV64IA-TSO-NEXT:    addiw a2, a2, -1
+; RV64IA-TSO-NEXT:    addi a2, a2, -1
 ; RV64IA-TSO-NEXT:    sllw a2, a2, a0
 ; RV64IA-TSO-NEXT:    not a2, a2
 ; RV64IA-TSO-NEXT:    amoand.w a1, a2, (a1)
@@ -7526,7 +7526,7 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 ; RV64IA-WMO-NEXT:    andi a1, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a2, 16
-; RV64IA-WMO-NEXT:    addiw a2, a2, -1
+; RV64IA-WMO-NEXT:    addi a2, a2, -1
 ; RV64IA-WMO-NEXT:    sllw a2, a2, a0
 ; RV64IA-WMO-NEXT:    not a2, a2
 ; RV64IA-WMO-NEXT:    amoand.w.aqrl a1, a2, (a1)
@@ -7538,7 +7538,7 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    andi a1, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a2, 16
-; RV64IA-TSO-NEXT:    addiw a2, a2, -1
+; RV64IA-TSO-NEXT:    addi a2, a2, -1
 ; RV64IA-TSO-NEXT:    sllw a2, a2, a0
 ; RV64IA-TSO-NEXT:    not a2, a2
 ; RV64IA-TSO-NEXT:    amoand.w a1, a2, (a1)
@@ -7600,7 +7600,7 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-NEXT:    andi a1, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a2, 16
-; RV64IA-WMO-NEXT:    addiw a2, a2, -1
+; RV64IA-WMO-NEXT:    addi a2, a2, -1
 ; RV64IA-WMO-NEXT:    sllw a2, a2, a0
 ; RV64IA-WMO-NEXT:    not a2, a2
 ; RV64IA-WMO-NEXT:    amoand.w.aqrl a1, a2, (a1)
@@ -7612,7 +7612,7 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    andi a1, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a2, 16
-; RV64IA-TSO-NEXT:    addiw a2, a2, -1
+; RV64IA-TSO-NEXT:    addi a2, a2, -1
 ; RV64IA-TSO-NEXT:    sllw a2, a2, a0
 ; RV64IA-TSO-NEXT:    not a2, a2
 ; RV64IA-TSO-NEXT:    amoand.w a1, a2, (a1)
@@ -7663,7 +7663,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
 ; RV64IA-NEXT:    andi a1, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a2, 16
-; RV64IA-NEXT:    addiw a2, a2, -1
+; RV64IA-NEXT:    addi a2, a2, -1
 ; RV64IA-NEXT:    sllw a2, a2, a0
 ; RV64IA-NEXT:    amoor.w a1, a2, (a1)
 ; RV64IA-NEXT:    srlw a0, a1, a0
@@ -7724,7 +7724,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
 ; RV64IA-WMO-NEXT:    andi a1, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a2, 16
-; RV64IA-WMO-NEXT:    addiw a2, a2, -1
+; RV64IA-WMO-NEXT:    addi a2, a2, -1
 ; RV64IA-WMO-NEXT:    sllw a2, a2, a0
 ; RV64IA-WMO-NEXT:    amoor.w.aq a1, a2, (a1)
 ; RV64IA-WMO-NEXT:    srlw a0, a1, a0
@@ -7735,7 +7735,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    andi a1, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a2, 16
-; RV64IA-TSO-NEXT:    addiw a2, a2, -1
+; RV64IA-TSO-NEXT:    addi a2, a2, -1
 ; RV64IA-TSO-NEXT:    sllw a2, a2, a0
 ; RV64IA-TSO-NEXT:    amoor.w a1, a2, (a1)
 ; RV64IA-TSO-NEXT:    srlw a0, a1, a0
@@ -7796,7 +7796,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
 ; RV64IA-WMO-NEXT:    andi a1, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a2, 16
-; RV64IA-WMO-NEXT:    addiw a2, a2, -1
+; RV64IA-WMO-NEXT:    addi a2, a2, -1
 ; RV64IA-WMO-NEXT:    sllw a2, a2, a0
 ; RV64IA-WMO-NEXT:    amoor.w.rl a1, a2, (a1)
 ; RV64IA-WMO-NEXT:    srlw a0, a1, a0
@@ -7807,7 +7807,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    andi a1, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a2, 16
-; RV64IA-TSO-NEXT:    addiw a2, a2, -1
+; RV64IA-TSO-NEXT:    addi a2, a2, -1
 ; RV64IA-TSO-NEXT:    sllw a2, a2, a0
 ; RV64IA-TSO-NEXT:    amoor.w a1, a2, (a1)
 ; RV64IA-TSO-NEXT:    srlw a0, a1, a0
@@ -7868,7 +7868,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
 ; RV64IA-WMO-NEXT:    andi a1, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a2, 16
-; RV64IA-WMO-NEXT:    addiw a2, a2, -1
+; RV64IA-WMO-NEXT:    addi a2, a2, -1
 ; RV64IA-WMO-NEXT:    sllw a2, a2, a0
 ; RV64IA-WMO-NEXT:    amoor.w.aqrl a1, a2, (a1)
 ; RV64IA-WMO-NEXT:    srlw a0, a1, a0
@@ -7879,7 +7879,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    andi a1, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a2, 16
-; RV64IA-TSO-NEXT:    addiw a2, a2, -1
+; RV64IA-TSO-NEXT:    addi a2, a2, -1
 ; RV64IA-TSO-NEXT:    sllw a2, a2, a0
 ; RV64IA-TSO-NEXT:    amoor.w a1, a2, (a1)
 ; RV64IA-TSO-NEXT:    srlw a0, a1, a0
@@ -7940,7 +7940,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-WMO-NEXT:    andi a1, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a2, 16
-; RV64IA-WMO-NEXT:    addiw a2, a2, -1
+; RV64IA-WMO-NEXT:    addi a2, a2, -1
 ; RV64IA-WMO-NEXT:    sllw a2, a2, a0
 ; RV64IA-WMO-NEXT:    amoor.w.aqrl a1, a2, (a1)
 ; RV64IA-WMO-NEXT:    srlw a0, a1, a0
@@ -7951,7 +7951,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-NEXT:    andi a1, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a2, 16
-; RV64IA-TSO-NEXT:    addiw a2, a2, -1
+; RV64IA-TSO-NEXT:    addi a2, a2, -1
 ; RV64IA-TSO-NEXT:    sllw a2, a2, a0
 ; RV64IA-TSO-NEXT:    amoor.w a1, a2, (a1)
 ; RV64IA-TSO-NEXT:    srlw a0, a1, a0
@@ -8007,7 +8007,7 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -8094,7 +8094,7 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -8115,7 +8115,7 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -8202,7 +8202,7 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -8223,7 +8223,7 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -8310,7 +8310,7 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -8331,7 +8331,7 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -8397,7 +8397,7 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -8463,7 +8463,7 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -8550,7 +8550,7 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -8571,7 +8571,7 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -8658,7 +8658,7 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -8679,7 +8679,7 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -8766,7 +8766,7 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -8787,7 +8787,7 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -8853,7 +8853,7 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -8913,7 +8913,7 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    not a4, a4
 ; RV64IA-NEXT:    and a1, a1, a3
@@ -8982,7 +8982,7 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    not a4, a4
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
@@ -8997,7 +8997,7 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    not a4, a4
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
@@ -9066,7 +9066,7 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    not a4, a4
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
@@ -9081,7 +9081,7 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    not a4, a4
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
@@ -9150,7 +9150,7 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    not a4, a4
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
@@ -9165,7 +9165,7 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    not a4, a4
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
@@ -9234,7 +9234,7 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    not a4, a4
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
@@ -9249,7 +9249,7 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    not a4, a4
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
@@ -9310,7 +9310,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -9400,7 +9400,7 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -9422,7 +9422,7 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -9512,7 +9512,7 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -9534,7 +9534,7 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -9624,7 +9624,7 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -9646,7 +9646,7 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -9714,7 +9714,7 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -10492,7 +10492,7 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
@@ -10673,7 +10673,7 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    andi a3, a0, 24
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a4, a0
 ; RV64IA-WMO-NEXT:    slli a1, a1, 48
 ; RV64IA-WMO-NEXT:    srai a1, a1, 48
@@ -10704,7 +10704,7 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    andi a3, a0, 24
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a4, a0
 ; RV64IA-TSO-NEXT:    slli a1, a1, 48
 ; RV64IA-TSO-NEXT:    srai a1, a1, 48
@@ -10885,7 +10885,7 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    andi a3, a0, 24
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a4, a0
 ; RV64IA-WMO-NEXT:    slli a1, a1, 48
 ; RV64IA-WMO-NEXT:    srai a1, a1, 48
@@ -10916,7 +10916,7 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    andi a3, a0, 24
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a4, a0
 ; RV64IA-TSO-NEXT:    slli a1, a1, 48
 ; RV64IA-TSO-NEXT:    srai a1, a1, 48
@@ -11097,7 +11097,7 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    andi a3, a0, 24
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a4, a0
 ; RV64IA-WMO-NEXT:    slli a1, a1, 48
 ; RV64IA-WMO-NEXT:    srai a1, a1, 48
@@ -11128,7 +11128,7 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    andi a3, a0, 24
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a4, a0
 ; RV64IA-TSO-NEXT:    slli a1, a1, 48
 ; RV64IA-TSO-NEXT:    srai a1, a1, 48
@@ -11278,7 +11278,7 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
@@ -11428,7 +11428,7 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
@@ -11609,7 +11609,7 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    andi a3, a0, 24
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a4, a0
 ; RV64IA-WMO-NEXT:    slli a1, a1, 48
 ; RV64IA-WMO-NEXT:    srai a1, a1, 48
@@ -11640,7 +11640,7 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    andi a3, a0, 24
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a4, a0
 ; RV64IA-TSO-NEXT:    slli a1, a1, 48
 ; RV64IA-TSO-NEXT:    srai a1, a1, 48
@@ -11821,7 +11821,7 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    andi a3, a0, 24
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a4, a0
 ; RV64IA-WMO-NEXT:    slli a1, a1, 48
 ; RV64IA-WMO-NEXT:    srai a1, a1, 48
@@ -11852,7 +11852,7 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    andi a3, a0, 24
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a4, a0
 ; RV64IA-TSO-NEXT:    slli a1, a1, 48
 ; RV64IA-TSO-NEXT:    srai a1, a1, 48
@@ -12033,7 +12033,7 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    andi a3, a0, 24
 ; RV64IA-WMO-NEXT:    lui a4, 16
-; RV64IA-WMO-NEXT:    addiw a4, a4, -1
+; RV64IA-WMO-NEXT:    addi a4, a4, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a4, a0
 ; RV64IA-WMO-NEXT:    slli a1, a1, 48
 ; RV64IA-WMO-NEXT:    srai a1, a1, 48
@@ -12064,7 +12064,7 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    andi a3, a0, 24
 ; RV64IA-TSO-NEXT:    lui a4, 16
-; RV64IA-TSO-NEXT:    addiw a4, a4, -1
+; RV64IA-TSO-NEXT:    addi a4, a4, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a4, a0
 ; RV64IA-TSO-NEXT:    slli a1, a1, 48
 ; RV64IA-TSO-NEXT:    srai a1, a1, 48
@@ -12214,7 +12214,7 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
@@ -12361,7 +12361,7 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -12528,7 +12528,7 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -12553,7 +12553,7 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -12720,7 +12720,7 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -12745,7 +12745,7 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -12912,7 +12912,7 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -12937,7 +12937,7 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -13079,7 +13079,7 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -13221,7 +13221,7 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -13388,7 +13388,7 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -13413,7 +13413,7 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -13580,7 +13580,7 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -13605,7 +13605,7 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -13772,7 +13772,7 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-WMO-NEXT:    andi a2, a0, -4
 ; RV64IA-WMO-NEXT:    slli a0, a0, 3
 ; RV64IA-WMO-NEXT:    lui a3, 16
-; RV64IA-WMO-NEXT:    addiw a3, a3, -1
+; RV64IA-WMO-NEXT:    addi a3, a3, -1
 ; RV64IA-WMO-NEXT:    sllw a4, a3, a0
 ; RV64IA-WMO-NEXT:    and a1, a1, a3
 ; RV64IA-WMO-NEXT:    sllw a1, a1, a0
@@ -13797,7 +13797,7 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NEXT:    andi a2, a0, -4
 ; RV64IA-TSO-NEXT:    slli a0, a0, 3
 ; RV64IA-TSO-NEXT:    lui a3, 16
-; RV64IA-TSO-NEXT:    addiw a3, a3, -1
+; RV64IA-TSO-NEXT:    addi a3, a3, -1
 ; RV64IA-TSO-NEXT:    sllw a4, a3, a0
 ; RV64IA-TSO-NEXT:    and a1, a1, a3
 ; RV64IA-TSO-NEXT:    sllw a1, a1, a0
@@ -13939,7 +13939,7 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index c08f045ee8abc0a..bd945c865c359d5 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -1207,7 +1207,7 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -1281,7 +1281,7 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -1355,7 +1355,7 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -1423,7 +1423,7 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    not a4, a4
 ; RV64IA-NEXT:    and a1, a1, a3
@@ -1492,7 +1492,7 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -1748,7 +1748,7 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
@@ -1904,7 +1904,7 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
@@ -2057,7 +2057,7 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -2205,7 +2205,7 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
-; RV64IA-NEXT:    addiw a3, a3, -1
+; RV64IA-NEXT:    addi a3, a3, -1
 ; RV64IA-NEXT:    sllw a4, a3, a0
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -3969,7 +3969,7 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext %
 ; RV64IA-NEXT:    andi a3, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a5, a4, a0
 ; RV64IA-NEXT:    and a1, a1, a4
 ; RV64IA-NEXT:    sllw a1, a1, a0
@@ -4054,7 +4054,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16
 ; RV64IA-NEXT:    andi a3, a0, -4
 ; RV64IA-NEXT:    slli a0, a0, 3
 ; RV64IA-NEXT:    lui a4, 16
-; RV64IA-NEXT:    addiw a4, a4, -1
+; RV64IA-NEXT:    addi a4, a4, -1
 ; RV64IA-NEXT:    sllw a5, a4, a0
 ; RV64IA-NEXT:    and a1, a1, a4
 ; RV64IA-NEXT:    sllw a1, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
index 4d75a74f06ac214..5f15a9c0671021b 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
@@ -140,7 +140,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; RV64IA-NEXT:    srlw a5, a3, a0
 ; RV64IA-NEXT:    sext.w a6, a3
 ; RV64IA-NEXT:    andi a7, a5, 255
-; RV64IA-NEXT:    addiw a5, a5, 1
+; RV64IA-NEXT:    addi a5, a5, 1
 ; RV64IA-NEXT:    sltu a7, a7, a1
 ; RV64IA-NEXT:    negw a7, a7
 ; RV64IA-NEXT:    and a5, a7, a5
@@ -304,7 +304,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; RV64IA-NEXT:    srlw a6, a4, a0
 ; RV64IA-NEXT:    sext.w a7, a4
 ; RV64IA-NEXT:    and t0, a6, a3
-; RV64IA-NEXT:    addiw a6, a6, 1
+; RV64IA-NEXT:    addi a6, a6, 1
 ; RV64IA-NEXT:    sltu t0, t0, a1
 ; RV64IA-NEXT:    negw t0, t0
 ; RV64IA-NEXT:    and a6, a6, a3
diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index b68f74a1f7c3a72..8a0c4240d161bfb 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -404,7 +404,7 @@ define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind {
 ; CHECK64ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; CHECK64ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
 ; CHECK64ZFBFMIN-NEXT:    seqz a1, a1
-; CHECK64ZFBFMIN-NEXT:    addiw a1, a1, -1
+; CHECK64ZFBFMIN-NEXT:    addi a1, a1, -1
 ; CHECK64ZFBFMIN-NEXT:    and a0, a0, a1
 ; CHECK64ZFBFMIN-NEXT:    slli a0, a0, 32
 ; CHECK64ZFBFMIN-NEXT:    srli a0, a0, 32
@@ -420,7 +420,7 @@ define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind {
 ; RV64ID-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV64ID-NEXT:    feq.s a1, fa5, fa5
 ; RV64ID-NEXT:    seqz a1, a1
-; RV64ID-NEXT:    addiw a1, a1, -1
+; RV64ID-NEXT:    addi a1, a1, -1
 ; RV64ID-NEXT:    and a0, a0, a1
 ; RV64ID-NEXT:    slli a0, a0, 32
 ; RV64ID-NEXT:    srli a0, a0, 32
@@ -1722,7 +1722,7 @@ define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind {
 ; CHECK64ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; CHECK64ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
 ; CHECK64ZFBFMIN-NEXT:    seqz a1, a1
-; CHECK64ZFBFMIN-NEXT:    addiw a1, a1, -1
+; CHECK64ZFBFMIN-NEXT:    addi a1, a1, -1
 ; CHECK64ZFBFMIN-NEXT:    and a0, a0, a1
 ; CHECK64ZFBFMIN-NEXT:    slli a0, a0, 32
 ; CHECK64ZFBFMIN-NEXT:    srli a0, a0, 32
@@ -1738,7 +1738,7 @@ define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind {
 ; RV64ID-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV64ID-NEXT:    feq.s a1, fa5, fa5
 ; RV64ID-NEXT:    seqz a1, a1
-; RV64ID-NEXT:    addiw a1, a1, -1
+; RV64ID-NEXT:    addi a1, a1, -1
 ; RV64ID-NEXT:    and a0, a0, a1
 ; RV64ID-NEXT:    slli a0, a0, 32
 ; RV64ID-NEXT:    srli a0, a0, 32
diff --git a/llvm/test/CodeGen/RISCV/bfloat.ll b/llvm/test/CodeGen/RISCV/bfloat.ll
index c95d61fd6baab7e..5013f76f9b0b33a 100644
--- a/llvm/test/CodeGen/RISCV/bfloat.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat.ll
@@ -367,7 +367,7 @@ define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
 ; RV64ID-LP64-NEXT:    addi sp, sp, -16
 ; RV64ID-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64ID-LP64-NEXT:    lui a2, 16
-; RV64ID-LP64-NEXT:    addiw a2, a2, -1
+; RV64ID-LP64-NEXT:    addi a2, a2, -1
 ; RV64ID-LP64-NEXT:    and a0, a0, a2
 ; RV64ID-LP64-NEXT:    and a1, a1, a2
 ; RV64ID-LP64-NEXT:    slli a1, a1, 16
@@ -409,7 +409,7 @@ define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
 ; RV64ID-LP64D-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
 ; RV64ID-LP64D-NEXT:    lui a1, 16
-; RV64ID-LP64D-NEXT:    addiw a1, a1, -1
+; RV64ID-LP64D-NEXT:    addi a1, a1, -1
 ; RV64ID-LP64D-NEXT:    and a0, a0, a1
 ; RV64ID-LP64D-NEXT:    fmv.x.w a2, fa1
 ; RV64ID-LP64D-NEXT:    and a1, a2, a1
@@ -605,7 +605,7 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
 ; RV64ID-LP64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
 ; RV64ID-LP64-NEXT:    mv s0, a0
 ; RV64ID-LP64-NEXT:    lui a0, 16
-; RV64ID-LP64-NEXT:    addiw a0, a0, -1
+; RV64ID-LP64-NEXT:    addi a0, a0, -1
 ; RV64ID-LP64-NEXT:    and a1, a1, a0
 ; RV64ID-LP64-NEXT:    and a0, a2, a0
 ; RV64ID-LP64-NEXT:    slli a0, a0, 16
@@ -652,7 +652,7 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
 ; RV64ID-LP64D-NEXT:    mv s0, a0
 ; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
 ; RV64ID-LP64D-NEXT:    lui a1, 16
-; RV64ID-LP64D-NEXT:    addiw a1, a1, -1
+; RV64ID-LP64D-NEXT:    addi a1, a1, -1
 ; RV64ID-LP64D-NEXT:    and a0, a0, a1
 ; RV64ID-LP64D-NEXT:    fmv.x.w a2, fa1
 ; RV64ID-LP64D-NEXT:    and a1, a2, a1
diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll
index d281c942773548b..f560a112dd92b47 100644
--- a/llvm/test/CodeGen/RISCV/bittest.ll
+++ b/llvm/test/CodeGen/RISCV/bittest.ll
@@ -266,7 +266,7 @@ define i1 @bittest_constant_by_var_shr_i32(i32 signext %b) nounwind {
 ; RV64I-LABEL: bittest_constant_by_var_shr_i32:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a1, 301408
-; RV64I-NEXT:    addiw a1, a1, 722
+; RV64I-NEXT:    addi a1, a1, 722
 ; RV64I-NEXT:    srlw a0, a1, a0
 ; RV64I-NEXT:    andi a0, a0, 1
 ; RV64I-NEXT:    ret
@@ -296,7 +296,7 @@ define i1 @bittest_constant_by_var_shr_i32(i32 signext %b) nounwind {
 ; RV64XTHEADBS-LABEL: bittest_constant_by_var_shr_i32:
 ; RV64XTHEADBS:       # %bb.0:
 ; RV64XTHEADBS-NEXT:    lui a1, 301408
-; RV64XTHEADBS-NEXT:    addiw a1, a1, 722
+; RV64XTHEADBS-NEXT:    addi a1, a1, 722
 ; RV64XTHEADBS-NEXT:    srlw a0, a1, a0
 ; RV64XTHEADBS-NEXT:    andi a0, a0, 1
 ; RV64XTHEADBS-NEXT:    ret
@@ -319,7 +319,7 @@ define i1 @bittest_constant_by_var_shl_i32(i32 signext %b) nounwind {
 ; RV64I-LABEL: bittest_constant_by_var_shl_i32:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a1, 301408
-; RV64I-NEXT:    addiw a1, a1, 722
+; RV64I-NEXT:    addi a1, a1, 722
 ; RV64I-NEXT:    srlw a0, a1, a0
 ; RV64I-NEXT:    andi a0, a0, 1
 ; RV64I-NEXT:    ret
@@ -349,7 +349,7 @@ define i1 @bittest_constant_by_var_shl_i32(i32 signext %b) nounwind {
 ; RV64XTHEADBS-LABEL: bittest_constant_by_var_shl_i32:
 ; RV64XTHEADBS:       # %bb.0:
 ; RV64XTHEADBS-NEXT:    lui a1, 301408
-; RV64XTHEADBS-NEXT:    addiw a1, a1, 722
+; RV64XTHEADBS-NEXT:    addi a1, a1, 722
 ; RV64XTHEADBS-NEXT:    srlw a0, a1, a0
 ; RV64XTHEADBS-NEXT:    andi a0, a0, 1
 ; RV64XTHEADBS-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
index d64fcbe57a85145..864dd8e529060c3 100644
--- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
@@ -141,7 +141,7 @@ define i64 @test_bswap_i64(i64 %a) nounwind {
 ; RV64I-NEXT:    or a1, a3, a1
 ; RV64I-NEXT:    and a4, a0, a4
 ; RV64I-NEXT:    slli a4, a4, 24
-; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    srli a3, a0, 24
 ; RV64I-NEXT:    slli a3, a3, 32
 ; RV64I-NEXT:    or a3, a4, a3
 ; RV64I-NEXT:    and a2, a0, a2
@@ -497,7 +497,7 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
 ; RV64ZBB-NEXT:    and a1, a1, a2
 ; RV64ZBB-NEXT:    srli a0, a0, 28
 ; RV64ZBB-NEXT:    lui a2, 986895
-; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    addi a2, a2, 240
 ; RV64ZBB-NEXT:    and a0, a0, a2
 ; RV64ZBB-NEXT:    sext.w a0, a0
 ; RV64ZBB-NEXT:    or a0, a1, a0
@@ -613,7 +613,7 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
 ; RV64I-NEXT:    or a1, a3, a1
 ; RV64I-NEXT:    and a4, a0, a4
 ; RV64I-NEXT:    slli a4, a4, 24
-; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    srli a3, a0, 24
 ; RV64I-NEXT:    slli a3, a3, 32
 ; RV64I-NEXT:    or a3, a4, a3
 ; RV64I-NEXT:    and a2, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
index fc53f70186b763d..6587f0c8c5af7bf 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
@@ -396,7 +396,7 @@ define i32 @caller_half_on_stack() nounwind {
 ; RV64IF-NEXT:    addi sp, sp, -16
 ; RV64IF-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64IF-NEXT:    lui a0, 1048565
-; RV64IF-NEXT:    addiw t0, a0, -1792
+; RV64IF-NEXT:    addi t0, a0, -1792
 ; RV64IF-NEXT:    li a0, 1
 ; RV64IF-NEXT:    li a1, 2
 ; RV64IF-NEXT:    li a2, 3
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 699bb44645711df..b54bb55432437f2 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -58,7 +58,7 @@ define i8 @test_cttz_i8(i8 %a) nounwind {
 ; RV64NOZBB-NEXT:    andi a1, a0, 255
 ; RV64NOZBB-NEXT:    beqz a1, .LBB0_2
 ; RV64NOZBB-NEXT:  # %bb.1: # %cond.false
-; RV64NOZBB-NEXT:    addiw a1, a0, -1
+; RV64NOZBB-NEXT:    addi a1, a0, -1
 ; RV64NOZBB-NEXT:    not a0, a0
 ; RV64NOZBB-NEXT:    and a0, a0, a1
 ; RV64NOZBB-NEXT:    srli a1, a0, 1
@@ -308,7 +308,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
 ; RV64M-NEXT:    negw a1, a0
 ; RV64M-NEXT:    and a0, a0, a1
 ; RV64M-NEXT:    lui a1, 30667
-; RV64M-NEXT:    addiw a1, a1, 1329
+; RV64M-NEXT:    addi a1, a1, 1329
 ; RV64M-NEXT:    mul a0, a0, a1
 ; RV64M-NEXT:    srliw a0, a0, 27
 ; RV64M-NEXT:    lui a1, %hi(.LCPI2_0)
@@ -567,7 +567,7 @@ define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind {
 ;
 ; RV64NOZBB-LABEL: test_cttz_i8_zero_undef:
 ; RV64NOZBB:       # %bb.0:
-; RV64NOZBB-NEXT:    addiw a1, a0, -1
+; RV64NOZBB-NEXT:    addi a1, a0, -1
 ; RV64NOZBB-NEXT:    not a0, a0
 ; RV64NOZBB-NEXT:    and a0, a0, a1
 ; RV64NOZBB-NEXT:    srli a1, a0, 1
@@ -753,7 +753,7 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
 ; RV64M-NEXT:    negw a1, a0
 ; RV64M-NEXT:    and a0, a0, a1
 ; RV64M-NEXT:    lui a1, 30667
-; RV64M-NEXT:    addiw a1, a1, 1329
+; RV64M-NEXT:    addi a1, a1, 1329
 ; RV64M-NEXT:    mul a0, a0, a1
 ; RV64M-NEXT:    srliw a0, a0, 27
 ; RV64M-NEXT:    lui a1, %hi(.LCPI6_0)
@@ -1315,10 +1315,10 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; RV64M-NEXT:    srli a1, a0, 4
 ; RV64M-NEXT:    add a0, a0, a1
 ; RV64M-NEXT:    lui a1, 61681
-; RV64M-NEXT:    addiw a1, a1, -241
+; RV64M-NEXT:    addi a1, a1, -241
 ; RV64M-NEXT:    and a0, a0, a1
 ; RV64M-NEXT:    lui a1, 4112
-; RV64M-NEXT:    addiw a1, a1, 257
+; RV64M-NEXT:    addi a1, a1, 257
 ; RV64M-NEXT:    mul a0, a0, a1
 ; RV64M-NEXT:    srliw a0, a0, 24
 ; RV64M-NEXT:    ret
@@ -1969,10 +1969,10 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
 ; RV64M-NEXT:    srli a1, a0, 4
 ; RV64M-NEXT:    add a0, a0, a1
 ; RV64M-NEXT:    lui a1, 61681
-; RV64M-NEXT:    addiw a1, a1, -241
+; RV64M-NEXT:    addi a1, a1, -241
 ; RV64M-NEXT:    and a0, a0, a1
 ; RV64M-NEXT:    lui a1, 4112
-; RV64M-NEXT:    addiw a1, a1, 257
+; RV64M-NEXT:    addi a1, a1, 257
 ; RV64M-NEXT:    mul a0, a0, a1
 ; RV64M-NEXT:    srliw a0, a0, 24
 ; RV64M-NEXT:    ret
@@ -2558,10 +2558,10 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
 ; RV64M-NEXT:    srli a1, a0, 4
 ; RV64M-NEXT:    add a0, a0, a1
 ; RV64M-NEXT:    lui a1, 61681
-; RV64M-NEXT:    addiw a1, a1, -241
+; RV64M-NEXT:    addi a1, a1, -241
 ; RV64M-NEXT:    and a0, a0, a1
 ; RV64M-NEXT:    lui a1, 4112
-; RV64M-NEXT:    addiw a1, a1, 257
+; RV64M-NEXT:    addi a1, a1, 257
 ; RV64M-NEXT:    mul a0, a0, a1
 ; RV64M-NEXT:    srliw a0, a0, 24
 ; RV64M-NEXT:    ret
@@ -3072,7 +3072,7 @@ define i32 @test_parity_i32(i32 %a) {
 ; RV64NOZBB:       # %bb.0:
 ; RV64NOZBB-NEXT:    slli a1, a0, 32
 ; RV64NOZBB-NEXT:    srli a1, a1, 32
-; RV64NOZBB-NEXT:    srliw a0, a0, 16
+; RV64NOZBB-NEXT:    srli a0, a0, 16
 ; RV64NOZBB-NEXT:    xor a0, a1, a0
 ; RV64NOZBB-NEXT:    srli a1, a0, 8
 ; RV64NOZBB-NEXT:    xor a0, a0, a1
@@ -3115,7 +3115,7 @@ define i32 @test_parity_i32(i32 %a) {
 ; RV64XTHEADBB-LABEL: test_parity_i32:
 ; RV64XTHEADBB:       # %bb.0:
 ; RV64XTHEADBB-NEXT:    th.extu a1, a0, 31, 0
-; RV64XTHEADBB-NEXT:    srliw a0, a0, 16
+; RV64XTHEADBB-NEXT:    srli a0, a0, 16
 ; RV64XTHEADBB-NEXT:    xor a0, a1, a0
 ; RV64XTHEADBB-NEXT:    srli a1, a0, 8
 ; RV64XTHEADBB-NEXT:    xor a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
index be1a834e56c2297..02072b3e4e5ca82 100644
--- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
+++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
@@ -98,7 +98,7 @@ define signext i32 @ctz_dereferencing_pointer(i64* %b) nounwind {
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    seqz a1, s0
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 63
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -177,7 +177,7 @@ define i64 @ctz_dereferencing_pointer_zext(i32* %b) nounwind {
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    seqz a1, s0
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 31
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -252,7 +252,7 @@ define signext i32 @ctz1(i32 signext %x) nounwind {
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    seqz a1, s0
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 31
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -325,7 +325,7 @@ define signext i32 @ctz1_flipped(i32 signext %x) nounwind {
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    seqz a1, s0
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 31
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -561,7 +561,7 @@ define signext i32 @ctz4(i64 %b) nounwind {
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    seqz a1, s0
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 63
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -804,7 +804,7 @@ define signext i32 @ctz5(i32 signext %x) nounwind {
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    seqz a1, s0
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 31
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -877,7 +877,7 @@ define signext i32 @ctz6(i32 signext %x) nounwind {
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    seqz a1, s0
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 31
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -956,7 +956,7 @@ define signext i32 @globalVar() nounwind {
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    lbu a0, 0(a0)
 ; RV64I-NEXT:    seqz a1, s0
-; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    addi a1, a1, -1
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 31
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll
index 4672b9a7d9abb51..bf19bbd8b131454 100644
--- a/llvm/test/CodeGen/RISCV/div-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll
@@ -24,7 +24,7 @@ define i32 @udiv_constant_no_add(i32 %a) nounwind {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    slli a0, a0, 32
 ; RV64-NEXT:    lui a1, 838861
-; RV64-NEXT:    addiw a1, a1, -819
+; RV64-NEXT:    addi a1, a1, -819
 ; RV64-NEXT:    slli a1, a1, 32
 ; RV64-NEXT:    mulhu a0, a0, a1
 ; RV64-NEXT:    srli a0, a0, 34
@@ -50,7 +50,7 @@ define i32 @udiv_constant_add(i32 %a) nounwind {
 ; RV64IM:       # %bb.0:
 ; RV64IM-NEXT:    slli a1, a0, 32
 ; RV64IM-NEXT:    lui a2, 149797
-; RV64IM-NEXT:    addiw a2, a2, -1755
+; RV64IM-NEXT:    addi a2, a2, -1755
 ; RV64IM-NEXT:    slli a2, a2, 32
 ; RV64IM-NEXT:    mulhu a1, a1, a2
 ; RV64IM-NEXT:    srli a1, a1, 32
diff --git a/llvm/test/CodeGen/RISCV/div-pow2.ll b/llvm/test/CodeGen/RISCV/div-pow2.ll
index 254e675b4ed8b5a..279fb6fd61d5555 100644
--- a/llvm/test/CodeGen/RISCV/div-pow2.ll
+++ b/llvm/test/CodeGen/RISCV/div-pow2.ll
@@ -14,7 +14,7 @@ define i32 @sdiv32_pow2_2(i32 %a) {
 ;
 ; RV64I-LABEL: sdiv32_pow2_2:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    srliw a1, a0, 31
+; RV64I-NEXT:    srli a1, a0, 31
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 1
 ; RV64I-NEXT:    ret
@@ -34,7 +34,7 @@ define i32 @sdiv32_pow2_negative_2(i32 %a) {
 ;
 ; RV64I-LABEL: sdiv32_pow2_negative_2:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    srliw a1, a0, 31
+; RV64I-NEXT:    srli a1, a0, 31
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 1
 ; RV64I-NEXT:    neg a0, a0
@@ -55,8 +55,8 @@ define i32 @sdiv32_pow2_2048(i32 %a) {
 ;
 ; RV64I-LABEL: sdiv32_pow2_2048:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 21
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 21
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 11
 ; RV64I-NEXT:    ret
@@ -77,8 +77,8 @@ define i32 @sdiv32_pow2_negative_2048(i32 %a) {
 ;
 ; RV64I-LABEL: sdiv32_pow2_negative_2048:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 21
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 21
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 11
 ; RV64I-NEXT:    neg a0, a0
@@ -99,8 +99,8 @@ define i32 @sdiv32_pow2_4096(i32 %a) {
 ;
 ; RV64I-LABEL: sdiv32_pow2_4096:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 20
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 20
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 12
 ; RV64I-NEXT:    ret
@@ -121,8 +121,8 @@ define i32 @sdiv32_pow2_negative_4096(i32 %a) {
 ;
 ; RV64I-LABEL: sdiv32_pow2_negative_4096:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 20
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 20
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 12
 ; RV64I-NEXT:    neg a0, a0
@@ -143,8 +143,8 @@ define i32 @sdiv32_pow2_65536(i32 %a) {
 ;
 ; RV64I-LABEL: sdiv32_pow2_65536:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 16
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 16
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 16
 ; RV64I-NEXT:    ret
@@ -165,8 +165,8 @@ define i32 @sdiv32_pow2_negative_65536(i32 %a) {
 ;
 ; RV64I-LABEL: sdiv32_pow2_negative_65536:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 16
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 16
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 16
 ; RV64I-NEXT:    neg a0, a0
diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll
index 2bde0349ccc7196..766742f40d02af4 100644
--- a/llvm/test/CodeGen/RISCV/div.ll
+++ b/llvm/test/CodeGen/RISCV/div.ll
@@ -69,7 +69,7 @@ define i32 @udiv_constant(i32 %a) nounwind {
 ; RV64IM:       # %bb.0:
 ; RV64IM-NEXT:    slli a0, a0, 32
 ; RV64IM-NEXT:    lui a1, 838861
-; RV64IM-NEXT:    addiw a1, a1, -819
+; RV64IM-NEXT:    addi a1, a1, -819
 ; RV64IM-NEXT:    slli a1, a1, 32
 ; RV64IM-NEXT:    mulhu a0, a0, a1
 ; RV64IM-NEXT:    srli a0, a0, 34
@@ -452,7 +452,7 @@ define i16 @udiv16(i16 %a, i16 %b) nounwind {
 ; RV64IM-LABEL: udiv16:
 ; RV64IM:       # %bb.0:
 ; RV64IM-NEXT:    lui a2, 16
-; RV64IM-NEXT:    addiw a2, a2, -1
+; RV64IM-NEXT:    addi a2, a2, -1
 ; RV64IM-NEXT:    and a1, a1, a2
 ; RV64IM-NEXT:    and a0, a0, a2
 ; RV64IM-NEXT:    divuw a0, a0, a1
@@ -667,16 +667,16 @@ define i32 @sdiv_pow2(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: sdiv_pow2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 29
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 29
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 3
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: sdiv_pow2:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sraiw a1, a0, 31
-; RV64IM-NEXT:    srliw a1, a1, 29
+; RV64IM-NEXT:    srai a1, a0, 31
+; RV64IM-NEXT:    srli a1, a1, 29
 ; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    sraiw a0, a0, 3
 ; RV64IM-NEXT:    ret
@@ -703,16 +703,16 @@ define i32 @sdiv_pow2_2(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: sdiv_pow2_2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 16
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 16
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    sraiw a0, a0, 16
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: sdiv_pow2_2:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sraiw a1, a0, 31
-; RV64IM-NEXT:    srliw a1, a1, 16
+; RV64IM-NEXT:    srai a1, a0, 31
+; RV64IM-NEXT:    srli a1, a1, 16
 ; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    sraiw a0, a0, 16
 ; RV64IM-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index 1ecb2c24bb6f228..39ac963051b5b0b 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -394,7 +394,7 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind {
 ; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV64IFD-NEXT:    feq.d a1, fa0, fa0
 ; RV64IFD-NEXT:    seqz a1, a1
-; RV64IFD-NEXT:    addiw a1, a1, -1
+; RV64IFD-NEXT:    addi a1, a1, -1
 ; RV64IFD-NEXT:    and a0, a0, a1
 ; RV64IFD-NEXT:    slli a0, a0, 32
 ; RV64IFD-NEXT:    srli a0, a0, 32
@@ -420,7 +420,7 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind {
 ; RV64IZFINXZDINX-NEXT:    fcvt.wu.d a1, a0, rtz
 ; RV64IZFINXZDINX-NEXT:    feq.d a0, a0, a0
 ; RV64IZFINXZDINX-NEXT:    seqz a0, a0
-; RV64IZFINXZDINX-NEXT:    addiw a0, a0, -1
+; RV64IZFINXZDINX-NEXT:    addi a0, a0, -1
 ; RV64IZFINXZDINX-NEXT:    and a0, a1, a0
 ; RV64IZFINXZDINX-NEXT:    slli a0, a0, 32
 ; RV64IZFINXZDINX-NEXT:    srli a0, a0, 32
@@ -1891,20 +1891,20 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ; RV64I-NEXT:    lui s1, 1048568
 ; RV64I-NEXT:  .LBB26_2: # %start
 ; RV64I-NEXT:    lui a0, 4152
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    slli a1, a0, 38
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __gtdf2 at plt
 ; RV64I-NEXT:    blez a0, .LBB26_4
 ; RV64I-NEXT:  # %bb.3: # %start
 ; RV64I-NEXT:    lui s1, 8
-; RV64I-NEXT:    addiw s1, s1, -1
+; RV64I-NEXT:    addi s1, s1, -1
 ; RV64I-NEXT:  .LBB26_4: # %start
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unorddf2 at plt
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, s1
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srai a0, a0, 48
@@ -2074,7 +2074,7 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind {
 ; RV64I-NEXT:    call __fixunsdfdi at plt
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lui a0, 8312
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    slli a1, a0, 37
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __gtdf2 at plt
@@ -2298,7 +2298,7 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unorddf2 at plt
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, s1
 ; RV64I-NEXT:    slli a0, a0, 56
 ; RV64I-NEXT:    srai a0, a0, 56
@@ -2509,7 +2509,7 @@ define zeroext i32 @fcvt_wu_d_sat_zext(double %a) nounwind {
 ; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV64IFD-NEXT:    feq.d a1, fa0, fa0
 ; RV64IFD-NEXT:    seqz a1, a1
-; RV64IFD-NEXT:    addiw a1, a1, -1
+; RV64IFD-NEXT:    addi a1, a1, -1
 ; RV64IFD-NEXT:    and a0, a0, a1
 ; RV64IFD-NEXT:    slli a0, a0, 32
 ; RV64IFD-NEXT:    srli a0, a0, 32
@@ -2535,7 +2535,7 @@ define zeroext i32 @fcvt_wu_d_sat_zext(double %a) nounwind {
 ; RV64IZFINXZDINX-NEXT:    fcvt.wu.d a1, a0, rtz
 ; RV64IZFINXZDINX-NEXT:    feq.d a0, a0, a0
 ; RV64IZFINXZDINX-NEXT:    seqz a0, a0
-; RV64IZFINXZDINX-NEXT:    addiw a0, a0, -1
+; RV64IZFINXZDINX-NEXT:    addi a0, a0, -1
 ; RV64IZFINXZDINX-NEXT:    and a0, a1, a0
 ; RV64IZFINXZDINX-NEXT:    slli a0, a0, 32
 ; RV64IZFINXZDINX-NEXT:    srli a0, a0, 32
@@ -2735,13 +2735,13 @@ define signext i32 @fcvt_w_d_sat_sext(double %a) nounwind {
 ; RV64I-NEXT:    call __gtdf2 at plt
 ; RV64I-NEXT:    blez a0, .LBB34_4
 ; RV64I-NEXT:  # %bb.3: # %start
-; RV64I-NEXT:    addiw s1, s3, -1
+; RV64I-NEXT:    addi s1, s3, -1
 ; RV64I-NEXT:  .LBB34_4: # %start
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unorddf2 at plt
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, s1
 ; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
index 066b6fe9c534885..83a4f63add337fe 100644
--- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
+++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
@@ -69,7 +69,7 @@ define void @_Z3foov() {
 ; CHECK-NEXT:    vl2r.v v16, (a1) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vle16.v v16, (a0)
 ; CHECK-NEXT:    lui a0, 1048572
-; CHECK-NEXT:    addiw a0, a0, 928
+; CHECK-NEXT:    addi a0, a0, 928
 ; CHECK-NEXT:    vmsbc.vx v0, v8, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, tu, mu
 ; CHECK-NEXT:    csrr a0, vlenb
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index 7224f5b79b7a112..235979b122215a6 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -243,7 +243,7 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind {
 ; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV64IF-NEXT:    feq.s a1, fa0, fa0
 ; RV64IF-NEXT:    seqz a1, a1
-; RV64IF-NEXT:    addiw a1, a1, -1
+; RV64IF-NEXT:    addi a1, a1, -1
 ; RV64IF-NEXT:    and a0, a0, a1
 ; RV64IF-NEXT:    slli a0, a0, 32
 ; RV64IF-NEXT:    srli a0, a0, 32
@@ -263,7 +263,7 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind {
 ; RV64IZFINX-NEXT:    fcvt.wu.s a1, a0, rtz
 ; RV64IZFINX-NEXT:    feq.s a0, a0, a0
 ; RV64IZFINX-NEXT:    seqz a0, a0
-; RV64IZFINX-NEXT:    addiw a0, a0, -1
+; RV64IZFINX-NEXT:    addi a0, a0, -1
 ; RV64IZFINX-NEXT:    and a0, a1, a0
 ; RV64IZFINX-NEXT:    slli a0, a0, 32
 ; RV64IZFINX-NEXT:    srli a0, a0, 32
@@ -1528,13 +1528,13 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV64I-NEXT:    blez a0, .LBB24_4
 ; RV64I-NEXT:  # %bb.3: # %start
 ; RV64I-NEXT:    lui s1, 8
-; RV64I-NEXT:    addiw s1, s1, -1
+; RV64I-NEXT:    addi s1, s1, -1
 ; RV64I-NEXT:  .LBB24_4: # %start
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unordsf2 at plt
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, s1
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srai a0, a0, 48
@@ -1874,7 +1874,7 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind {
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unordsf2 at plt
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, s1
 ; RV64I-NEXT:    slli a0, a0, 56
 ; RV64I-NEXT:    srai a0, a0, 56
@@ -2057,7 +2057,7 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind {
 ; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV64IF-NEXT:    feq.s a1, fa0, fa0
 ; RV64IF-NEXT:    seqz a1, a1
-; RV64IF-NEXT:    addiw a1, a1, -1
+; RV64IF-NEXT:    addi a1, a1, -1
 ; RV64IF-NEXT:    and a0, a0, a1
 ; RV64IF-NEXT:    slli a0, a0, 32
 ; RV64IF-NEXT:    srli a0, a0, 32
@@ -2077,7 +2077,7 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind {
 ; RV64IZFINX-NEXT:    fcvt.wu.s a1, a0, rtz
 ; RV64IZFINX-NEXT:    feq.s a0, a0, a0
 ; RV64IZFINX-NEXT:    seqz a0, a0
-; RV64IZFINX-NEXT:    addiw a0, a0, -1
+; RV64IZFINX-NEXT:    addi a0, a0, -1
 ; RV64IZFINX-NEXT:    and a0, a1, a0
 ; RV64IZFINX-NEXT:    slli a0, a0, 32
 ; RV64IZFINX-NEXT:    srli a0, a0, 32
@@ -2238,13 +2238,13 @@ define signext i32 @fcvt_w_s_sat_sext(float %a) nounwind {
 ; RV64I-NEXT:    call __gtsf2 at plt
 ; RV64I-NEXT:    blez a0, .LBB32_4
 ; RV64I-NEXT:  # %bb.3: # %start
-; RV64I-NEXT:    addiw s1, s3, -1
+; RV64I-NEXT:    addi s1, s3, -1
 ; RV64I-NEXT:  .LBB32_4: # %start
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unordsf2 at plt
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, s1
 ; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
index 434c9f05bd16f7c..5a6e0baf752d079 100644
--- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
+++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
@@ -325,7 +325,7 @@ define dso_local void @inc_g_i32() nounwind {
 ; RV64I:       # %bb.0: # %entry
 ; RV64I-NEXT:    lui a0, %hi(g_4_i32)
 ; RV64I-NEXT:    lw a1, %lo(g_4_i32)(a0)
-; RV64I-NEXT:    addiw a1, a1, 1
+; RV64I-NEXT:    addi a1, a1, 1
 ; RV64I-NEXT:    sw a1, %lo(g_4_i32)(a0)
 ; RV64I-NEXT:    ret
 ;
@@ -334,7 +334,7 @@ define dso_local void @inc_g_i32() nounwind {
 ; RV64I-MEDIUM-NEXT:  .Lpcrel_hi8:
 ; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_4_i32)
 ; RV64I-MEDIUM-NEXT:    lw a1, %pcrel_lo(.Lpcrel_hi8)(a0)
-; RV64I-MEDIUM-NEXT:    addiw a1, a1, 1
+; RV64I-MEDIUM-NEXT:    addi a1, a1, 1
 ; RV64I-MEDIUM-NEXT:    sw a1, %pcrel_lo(.Lpcrel_hi8)(a0)
 ; RV64I-MEDIUM-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index 4dadd6528300198..2d3f40e15fe4324 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -1703,7 +1703,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rtz
 ; RV64IZFH-NEXT:    feq.h a1, fa0, fa0
 ; RV64IZFH-NEXT:    seqz a1, a1
-; RV64IZFH-NEXT:    addiw a1, a1, -1
+; RV64IZFH-NEXT:    addi a1, a1, -1
 ; RV64IZFH-NEXT:    and a0, a0, a1
 ; RV64IZFH-NEXT:    slli a0, a0, 32
 ; RV64IZFH-NEXT:    srli a0, a0, 32
@@ -1723,7 +1723,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; RV64IDZFH-NEXT:    fcvt.wu.h a0, fa0, rtz
 ; RV64IDZFH-NEXT:    feq.h a1, fa0, fa0
 ; RV64IDZFH-NEXT:    seqz a1, a1
-; RV64IDZFH-NEXT:    addiw a1, a1, -1
+; RV64IDZFH-NEXT:    addi a1, a1, -1
 ; RV64IDZFH-NEXT:    and a0, a0, a1
 ; RV64IDZFH-NEXT:    slli a0, a0, 32
 ; RV64IDZFH-NEXT:    srli a0, a0, 32
@@ -1743,7 +1743,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; RV64IZHINX-NEXT:    fcvt.wu.h a1, a0, rtz
 ; RV64IZHINX-NEXT:    feq.h a0, a0, a0
 ; RV64IZHINX-NEXT:    seqz a0, a0
-; RV64IZHINX-NEXT:    addiw a0, a0, -1
+; RV64IZHINX-NEXT:    addi a0, a0, -1
 ; RV64IZHINX-NEXT:    and a0, a1, a0
 ; RV64IZHINX-NEXT:    slli a0, a0, 32
 ; RV64IZHINX-NEXT:    srli a0, a0, 32
@@ -1763,7 +1763,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; RV64IZDINXZHINX-NEXT:    fcvt.wu.h a1, a0, rtz
 ; RV64IZDINXZHINX-NEXT:    feq.h a0, a0, a0
 ; RV64IZDINXZHINX-NEXT:    seqz a0, a0
-; RV64IZDINXZHINX-NEXT:    addiw a0, a0, -1
+; RV64IZDINXZHINX-NEXT:    addi a0, a0, -1
 ; RV64IZDINXZHINX-NEXT:    and a0, a1, a0
 ; RV64IZDINXZHINX-NEXT:    slli a0, a0, 32
 ; RV64IZDINXZHINX-NEXT:    srli a0, a0, 32
@@ -1863,7 +1863,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; RV64ID-LP64-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV64ID-LP64-NEXT:    feq.s a1, fa5, fa5
 ; RV64ID-LP64-NEXT:    seqz a1, a1
-; RV64ID-LP64-NEXT:    addiw a1, a1, -1
+; RV64ID-LP64-NEXT:    addi a1, a1, -1
 ; RV64ID-LP64-NEXT:    and a0, a0, a1
 ; RV64ID-LP64-NEXT:    slli a0, a0, 32
 ; RV64ID-LP64-NEXT:    srli a0, a0, 32
@@ -1893,7 +1893,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; RV64ID-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV64ID-NEXT:    feq.s a1, fa0, fa0
 ; RV64ID-NEXT:    seqz a1, a1
-; RV64ID-NEXT:    addiw a1, a1, -1
+; RV64ID-NEXT:    addi a1, a1, -1
 ; RV64ID-NEXT:    and a0, a0, a1
 ; RV64ID-NEXT:    slli a0, a0, 32
 ; RV64ID-NEXT:    srli a0, a0, 32
@@ -1917,7 +1917,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; CHECK64-IZFHMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; CHECK64-IZFHMIN-NEXT:    feq.s a1, fa5, fa5
 ; CHECK64-IZFHMIN-NEXT:    seqz a1, a1
-; CHECK64-IZFHMIN-NEXT:    addiw a1, a1, -1
+; CHECK64-IZFHMIN-NEXT:    addi a1, a1, -1
 ; CHECK64-IZFHMIN-NEXT:    and a0, a0, a1
 ; CHECK64-IZFHMIN-NEXT:    slli a0, a0, 32
 ; CHECK64-IZFHMIN-NEXT:    srli a0, a0, 32
@@ -1939,7 +1939,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; CHECK64-IZHINXMIN-NEXT:    fcvt.wu.s a1, a0, rtz
 ; CHECK64-IZHINXMIN-NEXT:    feq.s a0, a0, a0
 ; CHECK64-IZHINXMIN-NEXT:    seqz a0, a0
-; CHECK64-IZHINXMIN-NEXT:    addiw a0, a0, -1
+; CHECK64-IZHINXMIN-NEXT:    addi a0, a0, -1
 ; CHECK64-IZHINXMIN-NEXT:    and a0, a1, a0
 ; CHECK64-IZHINXMIN-NEXT:    slli a0, a0, 32
 ; CHECK64-IZHINXMIN-NEXT:    srli a0, a0, 32
@@ -1961,7 +1961,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.wu.s a1, a0, rtz
 ; CHECK64-IZDINXZHINXMIN-NEXT:    feq.s a0, a0, a0
 ; CHECK64-IZDINXZHINXMIN-NEXT:    seqz a0, a0
-; CHECK64-IZDINXZHINXMIN-NEXT:    addiw a0, a0, -1
+; CHECK64-IZDINXZHINXMIN-NEXT:    addi a0, a0, -1
 ; CHECK64-IZDINXZHINXMIN-NEXT:    and a0, a1, a0
 ; CHECK64-IZDINXZHINXMIN-NEXT:    slli a0, a0, 32
 ; CHECK64-IZDINXZHINXMIN-NEXT:    srli a0, a0, 32
@@ -6551,13 +6551,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV64I-NEXT:    blez a0, .LBB32_4
 ; RV64I-NEXT:  # %bb.3: # %start
 ; RV64I-NEXT:    lui s1, 8
-; RV64I-NEXT:    addiw s1, s1, -1
+; RV64I-NEXT:    addi s1, s1, -1
 ; RV64I-NEXT:  .LBB32_4: # %start
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unordsf2 at plt
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, s1
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srai a0, a0, 48
@@ -7511,7 +7511,7 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unordsf2 at plt
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, s1
 ; RV64I-NEXT:    slli a0, a0, 56
 ; RV64I-NEXT:    srai a0, a0, 56
@@ -8128,7 +8128,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
 ; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rtz
 ; RV64IZFH-NEXT:    feq.h a1, fa0, fa0
 ; RV64IZFH-NEXT:    seqz a1, a1
-; RV64IZFH-NEXT:    addiw a1, a1, -1
+; RV64IZFH-NEXT:    addi a1, a1, -1
 ; RV64IZFH-NEXT:    and a0, a0, a1
 ; RV64IZFH-NEXT:    slli a0, a0, 32
 ; RV64IZFH-NEXT:    srli a0, a0, 32
@@ -8148,7 +8148,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
 ; RV64IDZFH-NEXT:    fcvt.wu.h a0, fa0, rtz
 ; RV64IDZFH-NEXT:    feq.h a1, fa0, fa0
 ; RV64IDZFH-NEXT:    seqz a1, a1
-; RV64IDZFH-NEXT:    addiw a1, a1, -1
+; RV64IDZFH-NEXT:    addi a1, a1, -1
 ; RV64IDZFH-NEXT:    and a0, a0, a1
 ; RV64IDZFH-NEXT:    slli a0, a0, 32
 ; RV64IDZFH-NEXT:    srli a0, a0, 32
@@ -8168,7 +8168,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
 ; RV64IZHINX-NEXT:    fcvt.wu.h a1, a0, rtz
 ; RV64IZHINX-NEXT:    feq.h a0, a0, a0
 ; RV64IZHINX-NEXT:    seqz a0, a0
-; RV64IZHINX-NEXT:    addiw a0, a0, -1
+; RV64IZHINX-NEXT:    addi a0, a0, -1
 ; RV64IZHINX-NEXT:    and a0, a1, a0
 ; RV64IZHINX-NEXT:    slli a0, a0, 32
 ; RV64IZHINX-NEXT:    srli a0, a0, 32
@@ -8188,7 +8188,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
 ; RV64IZDINXZHINX-NEXT:    fcvt.wu.h a1, a0, rtz
 ; RV64IZDINXZHINX-NEXT:    feq.h a0, a0, a0
 ; RV64IZDINXZHINX-NEXT:    seqz a0, a0
-; RV64IZDINXZHINX-NEXT:    addiw a0, a0, -1
+; RV64IZDINXZHINX-NEXT:    addi a0, a0, -1
 ; RV64IZDINXZHINX-NEXT:    and a0, a1, a0
 ; RV64IZDINXZHINX-NEXT:    slli a0, a0, 32
 ; RV64IZDINXZHINX-NEXT:    srli a0, a0, 32
@@ -8290,7 +8290,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
 ; RV64ID-LP64-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; RV64ID-LP64-NEXT:    feq.s a1, fa5, fa5
 ; RV64ID-LP64-NEXT:    seqz a1, a1
-; RV64ID-LP64-NEXT:    addiw a1, a1, -1
+; RV64ID-LP64-NEXT:    addi a1, a1, -1
 ; RV64ID-LP64-NEXT:    and a0, a0, a1
 ; RV64ID-LP64-NEXT:    slli a0, a0, 32
 ; RV64ID-LP64-NEXT:    srli a0, a0, 32
@@ -8320,7 +8320,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
 ; RV64ID-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV64ID-NEXT:    feq.s a1, fa0, fa0
 ; RV64ID-NEXT:    seqz a1, a1
-; RV64ID-NEXT:    addiw a1, a1, -1
+; RV64ID-NEXT:    addi a1, a1, -1
 ; RV64ID-NEXT:    and a0, a0, a1
 ; RV64ID-NEXT:    slli a0, a0, 32
 ; RV64ID-NEXT:    srli a0, a0, 32
@@ -8344,7 +8344,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
 ; CHECK64-IZFHMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
 ; CHECK64-IZFHMIN-NEXT:    feq.s a1, fa5, fa5
 ; CHECK64-IZFHMIN-NEXT:    seqz a1, a1
-; CHECK64-IZFHMIN-NEXT:    addiw a1, a1, -1
+; CHECK64-IZFHMIN-NEXT:    addi a1, a1, -1
 ; CHECK64-IZFHMIN-NEXT:    and a0, a0, a1
 ; CHECK64-IZFHMIN-NEXT:    slli a0, a0, 32
 ; CHECK64-IZFHMIN-NEXT:    srli a0, a0, 32
@@ -8366,7 +8366,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
 ; CHECK64-IZHINXMIN-NEXT:    fcvt.wu.s a1, a0, rtz
 ; CHECK64-IZHINXMIN-NEXT:    feq.s a0, a0, a0
 ; CHECK64-IZHINXMIN-NEXT:    seqz a0, a0
-; CHECK64-IZHINXMIN-NEXT:    addiw a0, a0, -1
+; CHECK64-IZHINXMIN-NEXT:    addi a0, a0, -1
 ; CHECK64-IZHINXMIN-NEXT:    and a0, a1, a0
 ; CHECK64-IZHINXMIN-NEXT:    slli a0, a0, 32
 ; CHECK64-IZHINXMIN-NEXT:    srli a0, a0, 32
@@ -8388,7 +8388,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
 ; CHECK64-IZDINXZHINXMIN-NEXT:    fcvt.wu.s a1, a0, rtz
 ; CHECK64-IZDINXZHINXMIN-NEXT:    feq.s a0, a0, a0
 ; CHECK64-IZDINXZHINXMIN-NEXT:    seqz a0, a0
-; CHECK64-IZDINXZHINXMIN-NEXT:    addiw a0, a0, -1
+; CHECK64-IZDINXZHINXMIN-NEXT:    addi a0, a0, -1
 ; CHECK64-IZDINXZHINXMIN-NEXT:    and a0, a1, a0
 ; CHECK64-IZDINXZHINXMIN-NEXT:    slli a0, a0, 32
 ; CHECK64-IZDINXZHINXMIN-NEXT:    srli a0, a0, 32
@@ -8518,13 +8518,13 @@ define signext i32 @fcvt_w_h_sat_sext(half %a) nounwind {
 ; RV64I-NEXT:    call __gtsf2 at plt
 ; RV64I-NEXT:    blez a0, .LBB40_4
 ; RV64I-NEXT:  # %bb.3: # %start
-; RV64I-NEXT:    addiw s1, s3, -1
+; RV64I-NEXT:    addi s1, s3, -1
 ; RV64I-NEXT:  .LBB40_4: # %start
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unordsf2 at plt
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, s1
 ; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index 70308857b2631a2..55c30046366d761 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -259,21 +259,13 @@ define ptr @offset_sh3add() {
 }
 
 define dso_local void @read_modify_write() local_unnamed_addr nounwind {
-; RV32-LABEL: read_modify_write:
-; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    lui a0, %hi(s+160)
-; RV32-NEXT:    lw a1, %lo(s+160)(a0)
-; RV32-NEXT:    addi a1, a1, 10
-; RV32-NEXT:    sw a1, %lo(s+160)(a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: read_modify_write:
-; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    lui a0, %hi(s+160)
-; RV64-NEXT:    lw a1, %lo(s+160)(a0)
-; RV64-NEXT:    addiw a1, a1, 10
-; RV64-NEXT:    sw a1, %lo(s+160)(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: read_modify_write:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a0, %hi(s+160)
+; CHECK-NEXT:    lw a1, %lo(s+160)(a0)
+; CHECK-NEXT:    addi a1, a1, 10
+; CHECK-NEXT:    sw a1, %lo(s+160)(a0)
+; CHECK-NEXT:    ret
 entry:
   %x = load i32, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1), align 4
   %y = add i32 %x, 10
@@ -375,21 +367,13 @@ define void @store_sh3add() {
 }
 
 define dso_local void @rmw_addi_addi() nounwind {
-; RV32-LABEL: rmw_addi_addi:
-; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    lui a0, %hi(bar+3211)
-; RV32-NEXT:    lbu a1, %lo(bar+3211)(a0)
-; RV32-NEXT:    addi a1, a1, 10
-; RV32-NEXT:    sb a1, %lo(bar+3211)(a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: rmw_addi_addi:
-; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    lui a0, %hi(bar+3211)
-; RV64-NEXT:    lbu a1, %lo(bar+3211)(a0)
-; RV64-NEXT:    addiw a1, a1, 10
-; RV64-NEXT:    sb a1, %lo(bar+3211)(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: rmw_addi_addi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a0, %hi(bar+3211)
+; CHECK-NEXT:    lbu a1, %lo(bar+3211)(a0)
+; CHECK-NEXT:    addi a1, a1, 10
+; CHECK-NEXT:    sb a1, %lo(bar+3211)(a0)
+; CHECK-NEXT:    ret
 entry:
   %0 = load i8, ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 3211)
   %1 = add i8 %0, 10
diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll
index 036a5d49d441c2d..afbbb9548c85d05 100644
--- a/llvm/test/CodeGen/RISCV/iabs.ll
+++ b/llvm/test/CodeGen/RISCV/iabs.ll
@@ -170,7 +170,7 @@ define i32 @abs32(i32 %x) {
 ;
 ; RV64I-LABEL: abs32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srai a1, a0, 31
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    subw a0, a0, a1
 ; RV64I-NEXT:    ret
@@ -201,7 +201,7 @@ define i32 @select_abs32(i32 %x) {
 ;
 ; RV64I-LABEL: select_abs32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srai a1, a0, 31
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    subw a0, a0, a1
 ; RV64I-NEXT:    ret
@@ -482,7 +482,7 @@ define i64 @zext_abs32(i32 %x) {
 ;
 ; RV64I-LABEL: zext_abs32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srai a1, a0, 31
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    subw a0, a0, a1
 ; RV64I-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index 738318e4bd6774a..e191933b42338aa 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -588,35 +588,35 @@ define i64 @imm64_6() nounwind {
 ; RV64I-LABEL: imm64_6:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a0, 9321
-; RV64I-NEXT:    addiw a0, a0, -1329
+; RV64I-NEXT:    addi a0, a0, -1329
 ; RV64I-NEXT:    slli a0, a0, 35
 ; RV64I-NEXT:    ret
 ;
 ; RV64IZBA-LABEL: imm64_6:
 ; RV64IZBA:       # %bb.0:
 ; RV64IZBA-NEXT:    lui a0, 9321
-; RV64IZBA-NEXT:    addiw a0, a0, -1329
+; RV64IZBA-NEXT:    addi a0, a0, -1329
 ; RV64IZBA-NEXT:    slli a0, a0, 35
 ; RV64IZBA-NEXT:    ret
 ;
 ; RV64IZBB-LABEL: imm64_6:
 ; RV64IZBB:       # %bb.0:
 ; RV64IZBB-NEXT:    lui a0, 9321
-; RV64IZBB-NEXT:    addiw a0, a0, -1329
+; RV64IZBB-NEXT:    addi a0, a0, -1329
 ; RV64IZBB-NEXT:    slli a0, a0, 35
 ; RV64IZBB-NEXT:    ret
 ;
 ; RV64IZBS-LABEL: imm64_6:
 ; RV64IZBS:       # %bb.0:
 ; RV64IZBS-NEXT:    lui a0, 9321
-; RV64IZBS-NEXT:    addiw a0, a0, -1329
+; RV64IZBS-NEXT:    addi a0, a0, -1329
 ; RV64IZBS-NEXT:    slli a0, a0, 35
 ; RV64IZBS-NEXT:    ret
 ;
 ; RV64IXTHEADBB-LABEL: imm64_6:
 ; RV64IXTHEADBB:       # %bb.0:
 ; RV64IXTHEADBB-NEXT:    lui a0, 9321
-; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1329
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1329
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 35
 ; RV64IXTHEADBB-NEXT:    ret
   ret i64 1311768464867721216 ; 0x1234_5678_0000_0000
@@ -709,7 +709,7 @@ define i64 @imm64_8() nounwind {
 ; RV64IZBA-LABEL: imm64_8:
 ; RV64IZBA:       # %bb.0:
 ; RV64IZBA-NEXT:    lui a0, 596523
-; RV64IZBA-NEXT:    addiw a0, a0, 965
+; RV64IZBA-NEXT:    addi a0, a0, 965
 ; RV64IZBA-NEXT:    slli.uw a0, a0, 13
 ; RV64IZBA-NEXT:    addi a0, a0, -1347
 ; RV64IZBA-NEXT:    slli a0, a0, 12
@@ -2298,7 +2298,7 @@ define i64 @imm_12900936431479() {
 ; RV64IZBA-LABEL: imm_12900936431479:
 ; RV64IZBA:       # %bb.0:
 ; RV64IZBA-NEXT:    lui a0, 768956
-; RV64IZBA-NEXT:    addiw a0, a0, -1093
+; RV64IZBA-NEXT:    addi a0, a0, -1093
 ; RV64IZBA-NEXT:    slli.uw a0, a0, 12
 ; RV64IZBA-NEXT:    addi a0, a0, 1911
 ; RV64IZBA-NEXT:    ret
@@ -2353,7 +2353,7 @@ define i64 @imm_12900918536874() {
 ; RV64IZBA-LABEL: imm_12900918536874:
 ; RV64IZBA:       # %bb.0:
 ; RV64IZBA-NEXT:    lui a0, 768955
-; RV64IZBA-NEXT:    addiw a0, a0, -1365
+; RV64IZBA-NEXT:    addi a0, a0, -1365
 ; RV64IZBA-NEXT:    slli.uw a0, a0, 12
 ; RV64IZBA-NEXT:    addi a0, a0, -1366
 ; RV64IZBA-NEXT:    ret
@@ -2408,7 +2408,7 @@ define i64 @imm_12900925247761() {
 ; RV64IZBA-LABEL: imm_12900925247761:
 ; RV64IZBA:       # %bb.0:
 ; RV64IZBA-NEXT:    lui a0, 768955
-; RV64IZBA-NEXT:    addiw a0, a0, 273
+; RV64IZBA-NEXT:    addi a0, a0, 273
 ; RV64IZBA-NEXT:    slli.uw a0, a0, 12
 ; RV64IZBA-NEXT:    addi a0, a0, 273
 ; RV64IZBA-NEXT:    ret
@@ -3030,7 +3030,7 @@ define i64 @imm64_same_lo_hi_negative() nounwind {
 ; RV64IZBA-LABEL: imm64_same_lo_hi_negative:
 ; RV64IZBA:       # %bb.0:
 ; RV64IZBA-NEXT:    lui a0, 526344
-; RV64IZBA-NEXT:    addiw a0, a0, 128
+; RV64IZBA-NEXT:    addi a0, a0, 128
 ; RV64IZBA-NEXT:    slli a1, a0, 32
 ; RV64IZBA-NEXT:    add.uw a0, a0, a1
 ; RV64IZBA-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll
index bcca8064e97a2fb..9e7f2e9525d3b4b 100644
--- a/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll
+++ b/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll
@@ -264,7 +264,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64-LABEL: add_ultcmp_i16_i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, -128
+; RV64-NEXT:    addi a0, a0, -128
 ; RV64-NEXT:    slli a0, a0, 48
 ; RV64-NEXT:    srli a0, a0, 56
 ; RV64-NEXT:    sltiu a0, a0, 255
@@ -431,7 +431,7 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64-LABEL: add_ulecmp_i16_i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, -128
+; RV64-NEXT:    addi a0, a0, -128
 ; RV64-NEXT:    slli a0, a0, 48
 ; RV64-NEXT:    srli a0, a0, 56
 ; RV64-NEXT:    sltiu a0, a0, 255
@@ -457,7 +457,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ugecmp_i16_i8:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 128
+; RV64I-NEXT:    addi a0, a0, 128
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 256
@@ -474,7 +474,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ugecmp_i16_i8:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 128
+; RV64ZBB-NEXT:    addi a0, a0, 128
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 256
 ; RV64ZBB-NEXT:    xori a0, a0, 1
@@ -645,7 +645,7 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ugtcmp_i16_i8:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 128
+; RV64I-NEXT:    addi a0, a0, 128
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 256
@@ -662,7 +662,7 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ugtcmp_i16_i8:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 128
+; RV64ZBB-NEXT:    addi a0, a0, 128
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 256
 ; RV64ZBB-NEXT:    xori a0, a0, 1
@@ -751,7 +751,7 @@ define i1 @add_ugecmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
 ; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_cmp:
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    zext.h a1, a1
-; RV64ZBB-NEXT:    addiw a0, a0, 128
+; RV64ZBB-NEXT:    addi a0, a0, 128
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltu a0, a0, a1
 ; RV64ZBB-NEXT:    xori a0, a0, 1
@@ -774,7 +774,7 @@ define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ugecmp_bad_i8_i16:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 128
+; RV64I-NEXT:    addi a0, a0, 128
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 128
@@ -791,7 +791,7 @@ define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ugecmp_bad_i8_i16:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 128
+; RV64ZBB-NEXT:    addi a0, a0, 128
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 128
 ; RV64ZBB-NEXT:    xori a0, a0, 1
@@ -814,7 +814,7 @@ define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ugecmp_bad_i16_i8_c0notpoweroftwo:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 192
+; RV64I-NEXT:    addi a0, a0, 192
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 256
@@ -831,7 +831,7 @@ define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_c0notpoweroftwo:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 192
+; RV64ZBB-NEXT:    addi a0, a0, 192
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 256
 ; RV64ZBB-NEXT:    xori a0, a0, 1
@@ -854,7 +854,7 @@ define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ugecmp_bad_i16_i8_c1notpoweroftwo:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 128
+; RV64I-NEXT:    addi a0, a0, 128
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 768
@@ -871,7 +871,7 @@ define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_c1notpoweroftwo:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 128
+; RV64ZBB-NEXT:    addi a0, a0, 128
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 768
 ; RV64ZBB-NEXT:    xori a0, a0, 1
@@ -894,7 +894,7 @@ define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ugecmp_bad_i16_i8_magic:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 64
+; RV64I-NEXT:    addi a0, a0, 64
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 256
@@ -911,7 +911,7 @@ define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_magic:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 64
+; RV64ZBB-NEXT:    addi a0, a0, 64
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 256
 ; RV64ZBB-NEXT:    xori a0, a0, 1
@@ -934,7 +934,7 @@ define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ugecmp_bad_i16_i4:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 8
+; RV64I-NEXT:    addi a0, a0, 8
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 16
@@ -951,7 +951,7 @@ define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ugecmp_bad_i16_i4:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 8
+; RV64ZBB-NEXT:    addi a0, a0, 8
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 16
 ; RV64ZBB-NEXT:    xori a0, a0, 1
@@ -974,7 +974,7 @@ define i1 @add_ugecmp_bad_i24_i8(i24 %x) nounwind {
 ;
 ; RV64-LABEL: add_ugecmp_bad_i24_i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, 128
+; RV64-NEXT:    addi a0, a0, 128
 ; RV64-NEXT:    slli a0, a0, 40
 ; RV64-NEXT:    srli a0, a0, 40
 ; RV64-NEXT:    sltiu a0, a0, 256
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll b/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll
index 03419932d030a96..21254b6302038ac 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll
@@ -13,7 +13,7 @@ define i32 @func1(i32 %x) #0 {
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    mul a0, a0, a0
-; CHECK-NEXT:    addiw s0, a0, 1
+; CHECK-NEXT:    addi s0, a0, 1
 ; CHECK-NEXT:    li a0, 4
 ; CHECK-NEXT:    call __cxa_allocate_exception at plt
 ; CHECK-NEXT:    sw s0, 0(a0)
@@ -40,7 +40,7 @@ define i32 @func2(i32 %x) #0 {
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    mul a0, a0, a0
-; CHECK-NEXT:    addiw s0, a0, 1
+; CHECK-NEXT:    addi s0, a0, 1
 ; CHECK-NEXT:    li a0, 4
 ; CHECK-NEXT:    call __cxa_allocate_exception at plt
 ; CHECK-NEXT:    sw s0, 0(a0)
diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll
index 932bd2e13d62a18..d831a2a002a3ede 100644
--- a/llvm/test/CodeGen/RISCV/memcpy.ll
+++ b/llvm/test/CodeGen/RISCV/memcpy.ll
@@ -167,7 +167,7 @@ define void @t2(ptr nocapture %C) nounwind {
 ; RV64-FAST-NEXT:    ld a2, %lo(.L.str2)(a1)
 ; RV64-FAST-NEXT:    sd a2, 0(a0)
 ; RV64-FAST-NEXT:    lui a2, 1156
-; RV64-FAST-NEXT:    addiw a2, a2, 332
+; RV64-FAST-NEXT:    addi a2, a2, 332
 ; RV64-FAST-NEXT:    addi a1, a1, %lo(.L.str2)
 ; RV64-FAST-NEXT:    ld a3, 24(a1)
 ; RV64-FAST-NEXT:    ld a4, 16(a1)
@@ -332,10 +332,10 @@ define void @t5(ptr nocapture %C) nounwind {
 ; RV64-FAST-LABEL: t5:
 ; RV64-FAST:       # %bb.0: # %entry
 ; RV64-FAST-NEXT:    lui a1, 1349
-; RV64-FAST-NEXT:    addiw a1, a1, 857
+; RV64-FAST-NEXT:    addi a1, a1, 857
 ; RV64-FAST-NEXT:    sw a1, 3(a0)
 ; RV64-FAST-NEXT:    lui a1, 365861
-; RV64-FAST-NEXT:    addiw a1, a1, -1980
+; RV64-FAST-NEXT:    addi a1, a1, -1980
 ; RV64-FAST-NEXT:    sw a1, 0(a0)
 ; RV64-FAST-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll
index 06be6cbd96410a2..9ebed2b430f14ce 100644
--- a/llvm/test/CodeGen/RISCV/neg-abs.ll
+++ b/llvm/test/CodeGen/RISCV/neg-abs.ll
@@ -27,14 +27,14 @@ define i32 @neg_abs32(i32 %x) {
 ;
 ; RV64I-LABEL: neg_abs32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srai a1, a0, 31
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    subw a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: neg_abs32:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    sraiw a1, a0, 31
+; RV64ZBB-NEXT:    srai a1, a0, 31
 ; RV64ZBB-NEXT:    xor a0, a0, a1
 ; RV64ZBB-NEXT:    subw a0, a1, a0
 ; RV64ZBB-NEXT:    ret
@@ -59,14 +59,14 @@ define i32 @select_neg_abs32(i32 %x) {
 ;
 ; RV64I-LABEL: select_neg_abs32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srai a1, a0, 31
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    subw a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: select_neg_abs32:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    sraiw a1, a0, 31
+; RV64ZBB-NEXT:    srai a1, a0, 31
 ; RV64ZBB-NEXT:    xor a0, a0, a1
 ; RV64ZBB-NEXT:    subw a0, a1, a0
 ; RV64ZBB-NEXT:    ret
@@ -177,7 +177,7 @@ define i32 @neg_abs32_multiuse(i32 %x, ptr %y) {
 ;
 ; RV64I-LABEL: neg_abs32_multiuse:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a2, a0, 31
+; RV64I-NEXT:    srai a2, a0, 31
 ; RV64I-NEXT:    xor a0, a0, a2
 ; RV64I-NEXT:    subw a2, a0, a2
 ; RV64I-NEXT:    negw a0, a2
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index 42f998e68bb6e71..7c3294fa81dcfe4 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -551,7 +551,7 @@ define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, ptr %p) {
 ;
 ; RV64-LABEL: uaddo_i8_increment_noncanonical_1:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a2, a0, 1
+; RV64-NEXT:    addi a2, a0, 1
 ; RV64-NEXT:    andi a0, a2, 255
 ; RV64-NEXT:    seqz a0, a0
 ; RV64-NEXT:    sb a2, 0(a1)
@@ -594,7 +594,7 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
 ;
 ; RV64-LABEL: uaddo_i16_increment_noncanonical_3:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a2, a0, 1
+; RV64-NEXT:    addi a2, a0, 1
 ; RV64-NEXT:    slli a0, a2, 48
 ; RV64-NEXT:    srli a0, a0, 48
 ; RV64-NEXT:    seqz a0, a0
@@ -672,7 +672,7 @@ define i1 @uaddo_i32_decrement_alt(i32 signext %x, ptr %p) {
 ; RV64-LABEL: uaddo_i32_decrement_alt:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    snez a2, a0
-; RV64-NEXT:    addiw a0, a0, -1
+; RV64-NEXT:    addi a0, a0, -1
 ; RV64-NEXT:    sw a0, 0(a1)
 ; RV64-NEXT:    mv a0, a2
 ; RV64-NEXT:    ret
@@ -915,7 +915,7 @@ define i1 @usubo_ult_constant_op1_i16(i16 %x, ptr %p) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    slli a2, a0, 48
 ; RV64-NEXT:    srli a2, a2, 48
-; RV64-NEXT:    addiw a3, a0, -44
+; RV64-NEXT:    addi a3, a0, -44
 ; RV64-NEXT:    sltiu a0, a2, 44
 ; RV64-NEXT:    sh a3, 0(a1)
 ; RV64-NEXT:    ret
@@ -939,7 +939,7 @@ define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    andi a2, a0, 255
 ; RV64-NEXT:    sltiu a2, a2, 45
-; RV64-NEXT:    addiw a0, a0, -45
+; RV64-NEXT:    addi a0, a0, -45
 ; RV64-NEXT:    sb a0, 0(a1)
 ; RV64-NEXT:    mv a0, a2
 ; RV64-NEXT:    ret
@@ -962,7 +962,7 @@ define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) {
 ; RV64-LABEL: usubo_eq_constant1_op1_i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    sext.w a2, a0
-; RV64-NEXT:    addiw a3, a0, -1
+; RV64-NEXT:    addi a3, a0, -1
 ; RV64-NEXT:    seqz a0, a2
 ; RV64-NEXT:    sw a3, 0(a1)
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rem.ll b/llvm/test/CodeGen/RISCV/rem.ll
index 3d5fd3fd43317f2..0b239e6f10fdcac 100644
--- a/llvm/test/CodeGen/RISCV/rem.ll
+++ b/llvm/test/CodeGen/RISCV/rem.ll
@@ -123,8 +123,8 @@ define i32 @srem_pow2(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: srem_pow2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 29
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 29
 ; RV64I-NEXT:    add a1, a0, a1
 ; RV64I-NEXT:    andi a1, a1, -8
 ; RV64I-NEXT:    subw a0, a0, a1
@@ -132,8 +132,8 @@ define i32 @srem_pow2(i32 %a) nounwind {
 ;
 ; RV64IM-LABEL: srem_pow2:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sraiw a1, a0, 31
-; RV64IM-NEXT:    srliw a1, a1, 29
+; RV64IM-NEXT:    srai a1, a0, 31
+; RV64IM-NEXT:    srli a1, a1, 29
 ; RV64IM-NEXT:    add a1, a0, a1
 ; RV64IM-NEXT:    andi a1, a1, -8
 ; RV64IM-NEXT:    subw a0, a0, a1
@@ -165,8 +165,8 @@ define i32 @srem_pow2_2(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: srem_pow2_2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 16
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 16
 ; RV64I-NEXT:    add a1, a0, a1
 ; RV64I-NEXT:    lui a2, 1048560
 ; RV64I-NEXT:    and a1, a1, a2
@@ -175,8 +175,8 @@ define i32 @srem_pow2_2(i32 %a) nounwind {
 ;
 ; RV64IM-LABEL: srem_pow2_2:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sraiw a1, a0, 31
-; RV64IM-NEXT:    srliw a1, a1, 16
+; RV64IM-NEXT:    srai a1, a0, 31
+; RV64IM-NEXT:    srli a1, a1, 16
 ; RV64IM-NEXT:    add a1, a0, a1
 ; RV64IM-NEXT:    lui a2, 1048560
 ; RV64IM-NEXT:    and a1, a1, a2
@@ -578,7 +578,7 @@ define i16 @urem16(i16 %a, i16 %b) nounwind {
 ; RV64IM-LABEL: urem16:
 ; RV64IM:       # %bb.0:
 ; RV64IM-NEXT:    lui a2, 16
-; RV64IM-NEXT:    addiw a2, a2, -1
+; RV64IM-NEXT:    addi a2, a2, -1
 ; RV64IM-NEXT:    and a1, a1, a2
 ; RV64IM-NEXT:    and a0, a0, a2
 ; RV64IM-NEXT:    remuw a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
index 61477589f8455e5..c343ef5b451de8d 100644
--- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
@@ -12,7 +12,7 @@ define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) {
 ; CHECK-NEXT:  .LBB0_1: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    lw a2, 0(a0)
-; CHECK-NEXT:    addiw a2, a2, 4
+; CHECK-NEXT:    addi a2, a2, 4
 ; CHECK-NEXT:    sw a2, 0(a0)
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    addi a0, a0, 4
@@ -62,9 +62,9 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) {
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    lw a5, -4(a4)
 ; CHECK-NEXT:    lw a6, 0(a4)
-; CHECK-NEXT:    addiw a5, a5, 4
+; CHECK-NEXT:    addi a5, a5, 4
 ; CHECK-NEXT:    sw a5, -4(a4)
-; CHECK-NEXT:    addiw a6, a6, 4
+; CHECK-NEXT:    addi a6, a6, 4
 ; CHECK-NEXT:    sw a6, 0(a4)
 ; CHECK-NEXT:    addi a3, a3, 2
 ; CHECK-NEXT:    addi a4, a4, 8
@@ -75,7 +75,7 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) {
 ; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:    lw a1, 0(a0)
-; CHECK-NEXT:    addiw a1, a1, 4
+; CHECK-NEXT:    addi a1, a1, 4
 ; CHECK-NEXT:    sw a1, 0(a0)
 ; CHECK-NEXT:  .LBB1_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll b/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll
index f957d25f5c490e7..eb9d58ea1e16fb7 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll
@@ -9,10 +9,10 @@ define i32 @foo(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mul a0, a0, a0
-; CHECK-NEXT:    addiw a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, 1
 ; CHECK-NEXT:    mul a0, a0, a0
 ; CHECK-NEXT:    add a0, a0, a2
-; CHECK-NEXT:    addiw a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, 1
 ; CHECK-NEXT:    sllw a0, a0, a1
 ; CHECK-NEXT:    ret
   %b = mul i32 %x, %x
@@ -169,7 +169,7 @@ define signext i32 @andi_srliw(i32 signext %0, ptr %1, i32 signext %2) {
 ; CHECK-LABEL: andi_srliw:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, -8
-; CHECK-NEXT:    srliw a4, a0, 3
+; CHECK-NEXT:    srli a4, a0, 3
 ; CHECK-NEXT:    addw a0, a3, a2
 ; CHECK-NEXT:    sw a4, 0(a1)
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
index 1d495b11f614030..7392f5e1a20e3d6 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
@@ -1659,14 +1659,14 @@ define signext i32 @sext_addiw_zext(i32 zeroext %a) nounwind {
 define zeroext i32 @zext_addiw_aext(i32 %a) nounwind {
 ; RV64I-LABEL: zext_addiw_aext:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 7
+; RV64I-NEXT:    addi a0, a0, 7
 ; RV64I-NEXT:    slli a0, a0, 32
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: zext_addiw_aext:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    addiw a0, a0, 7
+; RV64ZBA-NEXT:    addi a0, a0, 7
 ; RV64ZBA-NEXT:    zext.w a0, a0
 ; RV64ZBA-NEXT:    ret
   %1 = add i32 %a, 7
@@ -1676,14 +1676,14 @@ define zeroext i32 @zext_addiw_aext(i32 %a) nounwind {
 define zeroext i32 @zext_addiw_sext(i32 signext %a) nounwind {
 ; RV64I-LABEL: zext_addiw_sext:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 8
+; RV64I-NEXT:    addi a0, a0, 8
 ; RV64I-NEXT:    slli a0, a0, 32
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: zext_addiw_sext:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    addiw a0, a0, 8
+; RV64ZBA-NEXT:    addi a0, a0, 8
 ; RV64ZBA-NEXT:    zext.w a0, a0
 ; RV64ZBA-NEXT:    ret
   %1 = add i32 %a, 8
@@ -1693,14 +1693,14 @@ define zeroext i32 @zext_addiw_sext(i32 signext %a) nounwind {
 define zeroext i32 @zext_addiw_zext(i32 zeroext %a) nounwind {
 ; RV64I-LABEL: zext_addiw_zext:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 9
+; RV64I-NEXT:    addi a0, a0, 9
 ; RV64I-NEXT:    slli a0, a0, 32
 ; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: zext_addiw_zext:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    addiw a0, a0, 9
+; RV64ZBA-NEXT:    addi a0, a0, 9
 ; RV64ZBA-NEXT:    zext.w a0, a0
 ; RV64ZBA-NEXT:    ret
   %1 = add i32 %a, 9
@@ -1943,7 +1943,7 @@ define zeroext i32 @zext_sraiw_aext(i32 %a) nounwind {
 ;
 ; RV64ZBA-LABEL: zext_sraiw_aext:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    sraiw a0, a0, 7
+; RV64ZBA-NEXT:    srai a0, a0, 7
 ; RV64ZBA-NEXT:    zext.w a0, a0
 ; RV64ZBA-NEXT:    ret
   %1 = ashr i32 %a, 7
@@ -1970,7 +1970,7 @@ define zeroext i32 @zext_sraiw_zext(i32 zeroext %a) nounwind {
 ;
 ; RV64ZBA-LABEL: zext_sraiw_zext:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    sraiw a0, a0, 9
+; RV64ZBA-NEXT:    srai a0, a0, 9
 ; RV64ZBA-NEXT:    zext.w a0, a0
 ; RV64ZBA-NEXT:    ret
   %1 = ashr i32 %a, 9
diff --git a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
index 4a9d8b08a4b2f74..f38aa71fb158d00 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
@@ -113,7 +113,7 @@ define signext i32 @test9(ptr %0, i64 %1) {
 ; RV64I-LABEL: test9:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a2, 1
-; RV64I-NEXT:    addiw a2, a2, 1
+; RV64I-NEXT:    addi a2, a2, 1
 ; RV64I-NEXT:    addw a1, a1, a2
 ; RV64I-NEXT:    slli a1, a1, 2
 ; RV64I-NEXT:    add a0, a0, a1
@@ -133,7 +133,7 @@ define signext i32 @test10(ptr %0, i64 %1) {
 ; RV64I-LABEL: test10:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a2, 30141
-; RV64I-NEXT:    addiw a2, a2, -747
+; RV64I-NEXT:    addi a2, a2, -747
 ; RV64I-NEXT:    subw a2, a2, a1
 ; RV64I-NEXT:    slli a2, a2, 2
 ; RV64I-NEXT:    add a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
index eea04ae03f8d652..ab1691543c78af1 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
@@ -8,10 +8,10 @@ define signext i32 @addw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    not a2, a0
 ; CHECK-NEXT:    add a2, a2, a1
-; CHECK-NEXT:    addiw a3, a0, 1
+; CHECK-NEXT:    addi a3, a0, 1
 ; CHECK-NEXT:    mul a3, a2, a3
 ; CHECK-NEXT:    subw a1, a1, a0
-; CHECK-NEXT:    addiw a1, a1, -2
+; CHECK-NEXT:    addi a1, a1, -2
 ; CHECK-NEXT:    slli a1, a1, 32
 ; CHECK-NEXT:    slli a2, a2, 32
 ; CHECK-NEXT:    mulhu a1, a2, a1
@@ -56,7 +56,7 @@ define signext i32 @subw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
 ; CHECK-NEXT:    add a3, a2, a1
 ; CHECK-NEXT:    mul a2, a3, a2
 ; CHECK-NEXT:    subw a1, a1, a0
-; CHECK-NEXT:    addiw a1, a1, -2
+; CHECK-NEXT:    addi a1, a1, -2
 ; CHECK-NEXT:    slli a1, a1, 32
 ; CHECK-NEXT:    slli a3, a3, 32
 ; CHECK-NEXT:    mulhu a1, a3, a1
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
index 90a10d02636bd03..cdcadee3b82bf08 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -173,7 +173,7 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
 ;
 ; RV64XTHEADBB-LABEL: log2_ceil_i32:
 ; RV64XTHEADBB:       # %bb.0:
-; RV64XTHEADBB-NEXT:    addiw a0, a0, -1
+; RV64XTHEADBB-NEXT:    addi a0, a0, -1
 ; RV64XTHEADBB-NEXT:    not a0, a0
 ; RV64XTHEADBB-NEXT:    slli a0, a0, 32
 ; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
@@ -300,7 +300,7 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
 ;
 ; RV64XTHEADBB-LABEL: ctlz_lshr_i32:
 ; RV64XTHEADBB:       # %bb.0:
-; RV64XTHEADBB-NEXT:    srliw a0, a0, 1
+; RV64XTHEADBB-NEXT:    srli a0, a0, 1
 ; RV64XTHEADBB-NEXT:    not a0, a0
 ; RV64XTHEADBB-NEXT:    slli a0, a0, 32
 ; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
@@ -774,9 +774,9 @@ define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    srli a2, a0, 8
 ; RV64I-NEXT:    lui a3, 16
-; RV64I-NEXT:    addiw a3, a3, -256
+; RV64I-NEXT:    addi a3, a3, -256
 ; RV64I-NEXT:    and a2, a2, a3
-; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    srli a4, a0, 24
 ; RV64I-NEXT:    or a2, a2, a4
 ; RV64I-NEXT:    and a3, a0, a3
 ; RV64I-NEXT:    slli a3, a3, 8
@@ -817,7 +817,7 @@ define i64 @bswap_i64(i64 %a) {
 ; RV64I-NEXT:    or a1, a3, a1
 ; RV64I-NEXT:    and a4, a0, a4
 ; RV64I-NEXT:    slli a4, a4, 24
-; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    srli a3, a0, 24
 ; RV64I-NEXT:    slli a3, a3, 32
 ; RV64I-NEXT:    or a3, a4, a3
 ; RV64I-NEXT:    and a2, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll
index 07a4c093f06e68d..a7af8ab348e99e3 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll
@@ -30,7 +30,7 @@ define signext i32 @orcb32_knownbits(i32 signext %a) nounwind {
 ; RV64ZBB-NEXT:    lui a1, 1044480
 ; RV64ZBB-NEXT:    and a0, a0, a1
 ; RV64ZBB-NEXT:    lui a1, 2048
-; RV64ZBB-NEXT:    addiw a1, a1, 1
+; RV64ZBB-NEXT:    addi a1, a1, 1
 ; RV64ZBB-NEXT:    or a0, a0, a1
 ; RV64ZBB-NEXT:    orc.b a0, a0
 ; RV64ZBB-NEXT:    sext.w a0, a0
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
index d9afb7c00ce58f2..e12b7cc3dc01d35 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
@@ -278,7 +278,7 @@ define signext i32 @rori_i32_fshl(i32 signext %a) nounwind {
 define void @rori_i32_fshl_nosext(i32 signext %a, ptr %x) nounwind {
 ; RV64I-LABEL: rori_i32_fshl_nosext:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srliw a2, a0, 1
+; RV64I-NEXT:    srli a2, a0, 1
 ; RV64I-NEXT:    slli a0, a0, 31
 ; RV64I-NEXT:    or a0, a0, a2
 ; RV64I-NEXT:    sw a0, 0(a1)
@@ -315,7 +315,7 @@ define void @rori_i32_fshr_nosext(i32 signext %a, ptr %x) nounwind {
 ; RV64I-LABEL: rori_i32_fshr_nosext:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a2, a0, 1
-; RV64I-NEXT:    srliw a0, a0, 31
+; RV64I-NEXT:    srli a0, a0, 31
 ; RV64I-NEXT:    or a0, a0, a2
 ; RV64I-NEXT:    sw a0, 0(a1)
 ; RV64I-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 18f1574b5352679..ddbe4a2fc38e3f8 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -169,7 +169,7 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
 ;
 ; RV64ZBB-LABEL: log2_ceil_i32:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, -1
+; RV64ZBB-NEXT:    addi a0, a0, -1
 ; RV64ZBB-NEXT:    clzw a0, a0
 ; RV64ZBB-NEXT:    li a1, 32
 ; RV64ZBB-NEXT:    sub a0, a1, a0
@@ -292,7 +292,7 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
 ;
 ; RV64ZBB-LABEL: ctlz_lshr_i32:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    srliw a0, a0, 1
+; RV64ZBB-NEXT:    srli a0, a0, 1
 ; RV64ZBB-NEXT:    clzw a0, a0
 ; RV64ZBB-NEXT:    ret
   %1 = lshr i32 %a, 1
@@ -768,11 +768,11 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
 define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
 ; RV64I-LABEL: ctpop_v2i32_ult_two:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a2, a0, -1
+; RV64I-NEXT:    addi a2, a0, -1
 ; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    seqz a0, a0
-; RV64I-NEXT:    addiw a2, a1, -1
+; RV64I-NEXT:    addi a2, a1, -1
 ; RV64I-NEXT:    and a1, a1, a2
 ; RV64I-NEXT:    sext.w a1, a1
 ; RV64I-NEXT:    seqz a1, a1
@@ -793,11 +793,11 @@ define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
 define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind {
 ; RV64I-LABEL: ctpop_v2i32_ugt_one:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a2, a0, -1
+; RV64I-NEXT:    addi a2, a0, -1
 ; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a2, a1, -1
+; RV64I-NEXT:    addi a2, a1, -1
 ; RV64I-NEXT:    and a1, a1, a2
 ; RV64I-NEXT:    sext.w a1, a1
 ; RV64I-NEXT:    snez a1, a1
@@ -822,13 +822,13 @@ define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a2, a1
 ; RV64I-NEXT:    sext.w a3, a0
-; RV64I-NEXT:    addiw a4, a0, -1
+; RV64I-NEXT:    addi a4, a0, -1
 ; RV64I-NEXT:    and a0, a0, a4
 ; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    snez a3, a3
 ; RV64I-NEXT:    and a0, a3, a0
-; RV64I-NEXT:    addiw a3, a1, -1
+; RV64I-NEXT:    addi a3, a1, -1
 ; RV64I-NEXT:    and a1, a1, a3
 ; RV64I-NEXT:    sext.w a1, a1
 ; RV64I-NEXT:    seqz a1, a1
@@ -855,13 +855,13 @@ define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a2, a1
 ; RV64I-NEXT:    sext.w a3, a0
-; RV64I-NEXT:    addiw a4, a0, -1
+; RV64I-NEXT:    addi a4, a0, -1
 ; RV64I-NEXT:    and a0, a0, a4
 ; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    snez a0, a0
 ; RV64I-NEXT:    seqz a3, a3
 ; RV64I-NEXT:    or a0, a3, a0
-; RV64I-NEXT:    addiw a3, a1, -1
+; RV64I-NEXT:    addi a3, a1, -1
 ; RV64I-NEXT:    and a1, a1, a3
 ; RV64I-NEXT:    sext.w a1, a1
 ; RV64I-NEXT:    snez a1, a1
@@ -1404,7 +1404,7 @@ declare i32 @llvm.abs.i32(i32, i1 immarg)
 define i32 @abs_i32(i32 %x) {
 ; RV64I-LABEL: abs_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srai a1, a0, 31
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    subw a0, a0, a1
 ; RV64I-NEXT:    ret
@@ -1518,9 +1518,9 @@ define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    srli a2, a0, 8
 ; RV64I-NEXT:    lui a3, 16
-; RV64I-NEXT:    addiw a3, a3, -256
+; RV64I-NEXT:    addi a3, a3, -256
 ; RV64I-NEXT:    and a2, a2, a3
-; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    srli a4, a0, 24
 ; RV64I-NEXT:    or a2, a2, a4
 ; RV64I-NEXT:    and a3, a0, a3
 ; RV64I-NEXT:    slli a3, a3, 8
@@ -1562,7 +1562,7 @@ define i64 @bswap_i64(i64 %a) {
 ; RV64I-NEXT:    or a1, a3, a1
 ; RV64I-NEXT:    and a4, a0, a4
 ; RV64I-NEXT:    slli a4, a4, 24
-; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    srli a3, a0, 24
 ; RV64I-NEXT:    slli a3, a3, 32
 ; RV64I-NEXT:    or a3, a4, a3
 ; RV64I-NEXT:    and a2, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
index fa96c576017ba5a..dd6248233975a7e 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
@@ -303,7 +303,7 @@ define i64 @pack_i64_imm() {
 ; RV64ZBKB-LABEL: pack_i64_imm:
 ; RV64ZBKB:       # %bb.0:
 ; RV64ZBKB-NEXT:    lui a0, 65793
-; RV64ZBKB-NEXT:    addiw a0, a0, 16
+; RV64ZBKB-NEXT:    addi a0, a0, 16
 ; RV64ZBKB-NEXT:    pack a0, a0, a0
 ; RV64ZBKB-NEXT:    ret
   ret i64 1157442765409226768 ; 0x0101010101010101
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
index aa3d7b3fa8a7c87..3514fa66f5886d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
@@ -229,63 +229,34 @@ define <vscale x 64 x i8> @bitreverse_nxv64i8(<vscale x 64 x i8> %va) {
 declare <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8>)
 
 define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv1i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv1i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv1i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -298,63 +269,34 @@ define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) {
 declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>)
 
 define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv2i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -367,63 +309,34 @@ define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) {
 declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>)
 
 define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv4i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -436,63 +349,34 @@ define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) {
 declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>)
 
 define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -505,63 +389,34 @@ define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) {
 declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>)
 
 define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv16i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -574,63 +429,34 @@ define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) {
 declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>)
 
 define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv32i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -643,79 +469,42 @@ define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) {
 declare <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16>)
 
 define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32-LABEL: bitreverse_nxv1i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv1i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv1i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -728,79 +517,42 @@ define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) {
 declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>)
 
 define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32-LABEL: bitreverse_nxv2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv2i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -813,79 +565,42 @@ define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) {
 declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>)
 
 define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32-LABEL: bitreverse_nxv4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsrl.vi v12, v8, 24
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsll.vi v12, v12, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsrl.vi v12, v8, 24
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsll.vi v12, v12, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsrl.vi v12, v8, 24
+; CHECK-NEXT:    vor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsll.vi v12, v12, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv4i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -898,79 +613,42 @@ define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) {
 declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>)
 
 define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32-LABEL: bitreverse_nxv8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsrl.vi v16, v8, 24
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsll.vi v16, v16, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsrl.vi v16, v8, 24
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsll.vi v16, v16, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsrl.vi v16, v8, 24
+; CHECK-NEXT:    vor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsll.vi v16, v16, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv8i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -983,79 +661,42 @@ define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) {
 declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>)
 
 define <vscale x 16 x i32> @bitreverse_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32-LABEL: bitreverse_nxv16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsrl.vi v24, v8, 24
-; RV32-NEXT:    vor.vv v16, v16, v24
-; RV32-NEXT:    vand.vx v24, v8, a0
-; RV32-NEXT:    vsll.vi v24, v24, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_nxv16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsrl.vi v24, v8, 24
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v8, a0
-; RV64-NEXT:    vsll.vi v24, v24, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsrl.vi v24, v8, 24
+; CHECK-NEXT:    vor.vv v16, v16, v24
+; CHECK-NEXT:    vand.vx v24, v8, a0
+; CHECK-NEXT:    vsll.vi v24, v24, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v24
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: bitreverse_nxv16i32:
 ; CHECK-ZVBB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
index ca2d7639f528e3f..7698f860589aaf1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
@@ -487,63 +487,34 @@ define <vscale x 64 x i8> @vp_bitreverse_nxv64i8_unmasked(<vscale x 64 x i8> %va
 declare <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x i16> @vp_bitreverse_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv1i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv1i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -555,63 +526,34 @@ define <vscale x 1 x i16> @vp_bitreverse_nxv1i16(<vscale x 1 x i16> %va, <vscale
 }
 
 define <vscale x 1 x i16> @vp_bitreverse_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv1i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv1i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -627,63 +569,34 @@ define <vscale x 1 x i16> @vp_bitreverse_nxv1i16_unmasked(<vscale x 1 x i16> %va
 declare <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x i16> @vp_bitreverse_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -695,63 +608,34 @@ define <vscale x 2 x i16> @vp_bitreverse_nxv2i16(<vscale x 2 x i16> %va, <vscale
 }
 
 define <vscale x 2 x i16> @vp_bitreverse_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv2i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv2i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -767,63 +651,34 @@ define <vscale x 2 x i16> @vp_bitreverse_nxv2i16_unmasked(<vscale x 2 x i16> %va
 declare <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x i16> @vp_bitreverse_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -835,63 +690,34 @@ define <vscale x 4 x i16> @vp_bitreverse_nxv4i16(<vscale x 4 x i16> %va, <vscale
 }
 
 define <vscale x 4 x i16> @vp_bitreverse_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv4i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv4i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -907,63 +733,34 @@ define <vscale x 4 x i16> @vp_bitreverse_nxv4i16_unmasked(<vscale x 4 x i16> %va
 declare <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x i16> @vp_bitreverse_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -975,63 +772,34 @@ define <vscale x 8 x i16> @vp_bitreverse_nxv8i16(<vscale x 8 x i16> %va, <vscale
 }
 
 define <vscale x 8 x i16> @vp_bitreverse_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv8i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv8i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1047,63 +815,34 @@ define <vscale x 8 x i16> @vp_bitreverse_nxv8i16_unmasked(<vscale x 8 x i16> %va
 declare <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x i16> @vp_bitreverse_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1115,63 +854,34 @@ define <vscale x 16 x i16> @vp_bitreverse_nxv16i16(<vscale x 16 x i16> %va, <vsc
 }
 
 define <vscale x 16 x i16> @vp_bitreverse_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv16i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv16i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1187,63 +897,34 @@ define <vscale x 16 x i16> @vp_bitreverse_nxv16i16_unmasked(<vscale x 16 x i16>
 declare <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32)
 
 define <vscale x 32 x i16> @vp_bitreverse_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1255,63 +936,34 @@ define <vscale x 32 x i16> @vp_bitreverse_nxv32i16(<vscale x 32 x i16> %va, <vsc
 }
 
 define <vscale x 32 x i16> @vp_bitreverse_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv32i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv32i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1327,79 +979,42 @@ define <vscale x 32 x i16> @vp_bitreverse_nxv32i16_unmasked(<vscale x 32 x i16>
 declare <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x i32> @vp_bitreverse_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv1i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv1i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1411,79 +1026,42 @@ define <vscale x 1 x i32> @vp_bitreverse_nxv1i32(<vscale x 1 x i32> %va, <vscale
 }
 
 define <vscale x 1 x i32> @vp_bitreverse_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv1i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv1i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1499,79 +1077,42 @@ define <vscale x 1 x i32> @vp_bitreverse_nxv1i32_unmasked(<vscale x 1 x i32> %va
 declare <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x i32> @vp_bitreverse_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1583,79 +1124,42 @@ define <vscale x 2 x i32> @vp_bitreverse_nxv2i32(<vscale x 2 x i32> %va, <vscale
 }
 
 define <vscale x 2 x i32> @vp_bitreverse_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1671,79 +1175,42 @@ define <vscale x 2 x i32> @vp_bitreverse_nxv2i32_unmasked(<vscale x 2 x i32> %va
 declare <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x i32> @vp_bitreverse_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v10, v10, v12, v0.t
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v12, v12, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v10, v10, v12, v0.t
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v12, v12, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v10, v10, v12, v0.t
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v12, v12, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1755,79 +1222,42 @@ define <vscale x 4 x i32> @vp_bitreverse_nxv4i32(<vscale x 4 x i32> %va, <vscale
 }
 
 define <vscale x 4 x i32> @vp_bitreverse_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv4i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsrl.vi v12, v8, 24
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsll.vi v12, v12, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv4i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsrl.vi v12, v8, 24
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsll.vi v12, v12, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsrl.vi v12, v8, 24
+; CHECK-NEXT:    vor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsll.vi v12, v12, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1843,79 +1273,42 @@ define <vscale x 4 x i32> @vp_bitreverse_nxv4i32_unmasked(<vscale x 4 x i32> %va
 declare <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x i32> @vp_bitreverse_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v16, v16, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v12, v12, v16, v0.t
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v16, v16, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v12, v12, v16, v0.t
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v16, v16, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1927,167 +1320,93 @@ define <vscale x 8 x i32> @vp_bitreverse_nxv8i32(<vscale x 8 x i32> %va, <vscale
 }
 
 define <vscale x 8 x i32> @vp_bitreverse_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv8i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsrl.vi v16, v8, 24
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsll.vi v16, v16, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv8i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsrl.vi v16, v8, 24
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsll.vi v16, v16, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsrl.vi v16, v8, 24
+; CHECK-NEXT:    vor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsll.vi v16, v16, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32_unmasked:
-; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
-; CHECK-ZVBB-NEXT:    ret
-  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
-  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
-  %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
-  ret <vscale x 8 x i32> %v
-}
-
-declare <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
-
-define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vsrl.vi v24, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v16, v16, v24, v0.t
-; RV32-NEXT:    vand.vx v24, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v24, v24, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsrl.vi v24, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v16, v16, v24, v0.t
-; RV64-NEXT:    vand.vx v24, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v24, v24, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_bitreverse_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v24, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v16, v16, v24, v0.t
+; CHECK-NEXT:    vand.vx v24, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v24, v24, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -2099,79 +1418,42 @@ define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vsc
 }
 
 define <vscale x 16 x i32> @vp_bitreverse_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv16i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsrl.vi v24, v8, 24
-; RV32-NEXT:    vor.vv v16, v16, v24
-; RV32-NEXT:    vand.vx v24, v8, a0
-; RV32-NEXT:    vsll.vi v24, v24, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v16, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv16i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsrl.vi v24, v8, 24
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v8, a0
-; RV64-NEXT:    vsll.vi v24, v24, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v16, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsrl.vi v24, v8, 24
+; CHECK-NEXT:    vor.vv v16, v16, v24
+; CHECK-NEXT:    vand.vx v24, v8, a0
+; CHECK-NEXT:    vsll.vi v24, v24, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v24
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -3774,179 +3056,92 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va
 declare <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16>, <vscale x 64 x i1>, i32)
 
 define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv64i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 4
-; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV32-NEXT:    vmv1r.v v24, v0
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 3
-; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    srli a2, a1, 1
-; RV32-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
-; RV32-NEXT:    vslidedown.vx v0, v0, a2
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    sub a2, a0, a1
-; RV32-NEXT:    sltu a3, a0, a2
-; RV32-NEXT:    addi a3, a3, -1
-; RV32-NEXT:    and a2, a3, a2
-; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v8, v16, 8, v0.t
-; RV32-NEXT:    vsll.vi v16, v16, 8, v0.t
-; RV32-NEXT:    vor.vv v16, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v8, v16, 4, v0.t
-; RV32-NEXT:    lui a2, 1
-; RV32-NEXT:    addi a2, a2, -241
-; RV32-NEXT:    vand.vx v8, v8, a2, v0.t
-; RV32-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV32-NEXT:    vsll.vi v16, v16, 4, v0.t
-; RV32-NEXT:    vor.vv v16, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v16, 2, v0.t
-; RV32-NEXT:    lui a3, 3
-; RV32-NEXT:    addi a3, a3, 819
-; RV32-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV32-NEXT:    vand.vx v16, v16, a3, v0.t
-; RV32-NEXT:    vsll.vi v16, v16, 2, v0.t
-; RV32-NEXT:    vor.vv v16, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
-; RV32-NEXT:    lui a4, 5
-; RV32-NEXT:    addi a4, a4, 1365
-; RV32-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV32-NEXT:    vand.vx v16, v16, a4, v0.t
-; RV32-NEXT:    vsll.vi v16, v16, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a5, sp, 16
-; RV32-NEXT:    vs8r.v v8, (a5) # Unknown-size Folded Spill
-; RV32-NEXT:    bltu a0, a1, .LBB46_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, a1
-; RV32-NEXT:  .LBB46_2:
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vmv1r.v v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a2, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v16, v16, a3, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    vand.vx v16, v16, a4, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv64i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    sub sp, sp, a1
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV64-NEXT:    vmv1r.v v24, v0
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    srli a2, a1, 1
-; RV64-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
-; RV64-NEXT:    vslidedown.vx v0, v0, a2
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    sub a2, a0, a1
-; RV64-NEXT:    sltu a3, a0, a2
-; RV64-NEXT:    addi a3, a3, -1
-; RV64-NEXT:    and a2, a3, a2
-; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v8, v16, 8, v0.t
-; RV64-NEXT:    vsll.vi v16, v16, 8, v0.t
-; RV64-NEXT:    vor.vv v16, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v8, v16, 4, v0.t
-; RV64-NEXT:    lui a2, 1
-; RV64-NEXT:    addiw a2, a2, -241
-; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vsll.vi v16, v16, 4, v0.t
-; RV64-NEXT:    vor.vv v16, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v8, v16, 2, v0.t
-; RV64-NEXT:    lui a3, 3
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a3, v0.t
-; RV64-NEXT:    vsll.vi v16, v16, 2, v0.t
-; RV64-NEXT:    vor.vv v16, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v8, v16, 1, v0.t
-; RV64-NEXT:    lui a4, 5
-; RV64-NEXT:    addiw a4, a4, 1365
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a4, v0.t
-; RV64-NEXT:    vsll.vi v16, v16, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    addi a5, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a5) # Unknown-size Folded Spill
-; RV64-NEXT:    bltu a0, a1, .LBB46_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a0, a1
-; RV64-NEXT:  .LBB46_2:
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vmv1r.v v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a3, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a4, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    addi sp, sp, 16
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv64i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    add a1, sp, a1
+; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a2, a1, 1
+; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v8, v16, 8, v0.t
+; CHECK-NEXT:    vsll.vi v16, v16, 8, v0.t
+; CHECK-NEXT:    vor.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v16, 4, v0.t
+; CHECK-NEXT:    lui a2, 1
+; CHECK-NEXT:    addi a2, a2, -241
+; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT:    vand.vx v16, v16, a2, v0.t
+; CHECK-NEXT:    vsll.vi v16, v16, 4, v0.t
+; CHECK-NEXT:    vor.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v16, 2, v0.t
+; CHECK-NEXT:    lui a3, 3
+; CHECK-NEXT:    addi a3, a3, 819
+; CHECK-NEXT:    vand.vx v8, v8, a3, v0.t
+; CHECK-NEXT:    vand.vx v16, v16, a3, v0.t
+; CHECK-NEXT:    vsll.vi v16, v16, 2, v0.t
+; CHECK-NEXT:    vor.vv v16, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v16, 1, v0.t
+; CHECK-NEXT:    lui a4, 5
+; CHECK-NEXT:    addi a4, a4, 1365
+; CHECK-NEXT:    vand.vx v8, v8, a4, v0.t
+; CHECK-NEXT:    vand.vx v16, v16, a4, v0.t
+; CHECK-NEXT:    vsll.vi v16, v16, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    addi a5, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a5) # Unknown-size Folded Spill
+; CHECK-NEXT:    bltu a0, a1, .LBB46_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB46_2:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vand.vx v16, v16, a2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v16, v16, a3, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a3, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    vand.vx v16, v16, a4, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a4, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -3975,121 +3170,63 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
 }
 
 define <vscale x 64 x i16> @vp_bitreverse_nxv64i16_unmasked(<vscale x 64 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv64i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    sub a2, a0, a1
-; RV32-NEXT:    sltu a3, a0, a2
-; RV32-NEXT:    addi a3, a3, -1
-; RV32-NEXT:    and a2, a3, a2
-; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v24, v16, 8
-; RV32-NEXT:    vsll.vi v16, v16, 8
-; RV32-NEXT:    vor.vv v16, v16, v24
-; RV32-NEXT:    vsrl.vi v24, v16, 4
-; RV32-NEXT:    lui a2, 1
-; RV32-NEXT:    addi a2, a2, -241
-; RV32-NEXT:    vand.vx v24, v24, a2
-; RV32-NEXT:    vand.vx v16, v16, a2
-; RV32-NEXT:    vsll.vi v16, v16, 4
-; RV32-NEXT:    vor.vv v16, v24, v16
-; RV32-NEXT:    vsrl.vi v24, v16, 2
-; RV32-NEXT:    lui a3, 3
-; RV32-NEXT:    addi a3, a3, 819
-; RV32-NEXT:    vand.vx v24, v24, a3
-; RV32-NEXT:    vand.vx v16, v16, a3
-; RV32-NEXT:    vsll.vi v16, v16, 2
-; RV32-NEXT:    vor.vv v16, v24, v16
-; RV32-NEXT:    vsrl.vi v24, v16, 1
-; RV32-NEXT:    lui a4, 5
-; RV32-NEXT:    addi a4, a4, 1365
-; RV32-NEXT:    vand.vx v24, v24, a4
-; RV32-NEXT:    vand.vx v16, v16, a4
-; RV32-NEXT:    vadd.vv v16, v16, v16
-; RV32-NEXT:    vor.vv v16, v24, v16
-; RV32-NEXT:    bltu a0, a1, .LBB47_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    mv a0, a1
-; RV32-NEXT:  .LBB47_2:
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v24, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vand.vx v24, v24, a2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 2
-; RV32-NEXT:    vand.vx v24, v24, a3
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 1
-; RV32-NEXT:    vand.vx v24, v24, a4
-; RV32-NEXT:    vand.vx v8, v8, a4
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv64i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    sub a2, a0, a1
-; RV64-NEXT:    sltu a3, a0, a2
-; RV64-NEXT:    addi a3, a3, -1
-; RV64-NEXT:    and a2, a3, a2
-; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v16, 8
-; RV64-NEXT:    vsll.vi v16, v16, 8
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    lui a2, 1
-; RV64-NEXT:    addiw a2, a2, -241
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vand.vx v16, v16, a2
-; RV64-NEXT:    vsll.vi v16, v16, 4
-; RV64-NEXT:    vor.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 2
-; RV64-NEXT:    lui a3, 3
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    vand.vx v24, v24, a3
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vor.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    lui a4, 5
-; RV64-NEXT:    addiw a4, a4, 1365
-; RV64-NEXT:    vand.vx v24, v24, a4
-; RV64-NEXT:    vand.vx v16, v16, a4
-; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vor.vv v16, v24, v16
-; RV64-NEXT:    bltu a0, a1, .LBB47_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a0, a1
-; RV64-NEXT:  .LBB47_2:
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vsrl.vi v24, v8, 4
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 2
-; RV64-NEXT:    vand.vx v24, v24, a3
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    vand.vx v24, v24, a4
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v24, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv64i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v24, v16, 8
+; CHECK-NEXT:    vsll.vi v16, v16, 8
+; CHECK-NEXT:    vor.vv v16, v16, v24
+; CHECK-NEXT:    vsrl.vi v24, v16, 4
+; CHECK-NEXT:    lui a2, 1
+; CHECK-NEXT:    addi a2, a2, -241
+; CHECK-NEXT:    vand.vx v24, v24, a2
+; CHECK-NEXT:    vand.vx v16, v16, a2
+; CHECK-NEXT:    vsll.vi v16, v16, 4
+; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    vsrl.vi v24, v16, 2
+; CHECK-NEXT:    lui a3, 3
+; CHECK-NEXT:    addi a3, a3, 819
+; CHECK-NEXT:    vand.vx v24, v24, a3
+; CHECK-NEXT:    vand.vx v16, v16, a3
+; CHECK-NEXT:    vsll.vi v16, v16, 2
+; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    vsrl.vi v24, v16, 1
+; CHECK-NEXT:    lui a4, 5
+; CHECK-NEXT:    addi a4, a4, 1365
+; CHECK-NEXT:    vand.vx v24, v24, a4
+; CHECK-NEXT:    vand.vx v16, v16, a4
+; CHECK-NEXT:    vadd.vv v16, v16, v16
+; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    bltu a0, a1, .LBB47_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB47_2:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v24, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v24
+; CHECK-NEXT:    vsrl.vi v24, v8, 4
+; CHECK-NEXT:    vand.vx v24, v24, a2
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v24, v8
+; CHECK-NEXT:    vsrl.vi v24, v8, 2
+; CHECK-NEXT:    vand.vx v24, v24, a3
+; CHECK-NEXT:    vand.vx v8, v8, a3
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v24, v8
+; CHECK-NEXT:    vsrl.vi v24, v8, 1
+; CHECK-NEXT:    vand.vx v24, v24, a4
+; CHECK-NEXT:    vand.vx v8, v8, a4
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v24, v8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -4117,65 +3254,35 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16_unmasked(<vscale x 64 x i16>
 ; Test promotion.
 declare <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
 define <vscale x 1 x i9> @vp_bitreverse_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv1i9:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 7, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_nxv1i9:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 7, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_nxv1i9:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 7, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i9:
 ; CHECK-ZVBB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
index 884bb206a31ebe2..c55399b502471a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
@@ -119,37 +119,21 @@ define <vscale x 32 x i16> @bswap_nxv32i16(<vscale x 32 x i16> %va) {
 declare <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16>)
 
 define <vscale x 1 x i32> @bswap_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32-LABEL: bswap_nxv1i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bswap_nxv1i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: bswap_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: bswap_nxv1i32:
 ; CHECK-ZVKB:       # %bb.0:
@@ -162,37 +146,21 @@ define <vscale x 1 x i32> @bswap_nxv1i32(<vscale x 1 x i32> %va) {
 declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>)
 
 define <vscale x 2 x i32> @bswap_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32-LABEL: bswap_nxv2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bswap_nxv2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: bswap_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: bswap_nxv2i32:
 ; CHECK-ZVKB:       # %bb.0:
@@ -205,37 +173,21 @@ define <vscale x 2 x i32> @bswap_nxv2i32(<vscale x 2 x i32> %va) {
 declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>)
 
 define <vscale x 4 x i32> @bswap_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32-LABEL: bswap_nxv4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsrl.vi v12, v8, 24
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsll.vi v12, v12, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bswap_nxv4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsrl.vi v12, v8, 24
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsll.vi v12, v12, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: bswap_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsrl.vi v12, v8, 24
+; CHECK-NEXT:    vor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsll.vi v12, v12, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: bswap_nxv4i32:
 ; CHECK-ZVKB:       # %bb.0:
@@ -248,37 +200,21 @@ define <vscale x 4 x i32> @bswap_nxv4i32(<vscale x 4 x i32> %va) {
 declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>)
 
 define <vscale x 8 x i32> @bswap_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32-LABEL: bswap_nxv8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsrl.vi v16, v8, 24
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsll.vi v16, v16, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bswap_nxv8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsrl.vi v16, v8, 24
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsll.vi v16, v16, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: bswap_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsrl.vi v16, v8, 24
+; CHECK-NEXT:    vor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsll.vi v16, v16, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: bswap_nxv8i32:
 ; CHECK-ZVKB:       # %bb.0:
@@ -291,37 +227,21 @@ define <vscale x 8 x i32> @bswap_nxv8i32(<vscale x 8 x i32> %va) {
 declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>)
 
 define <vscale x 16 x i32> @bswap_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32-LABEL: bswap_nxv16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsrl.vi v24, v8, 24
-; RV32-NEXT:    vor.vv v16, v16, v24
-; RV32-NEXT:    vand.vx v24, v8, a0
-; RV32-NEXT:    vsll.vi v24, v24, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bswap_nxv16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsrl.vi v24, v8, 24
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v8, a0
-; RV64-NEXT:    vsll.vi v24, v24, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: bswap_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsrl.vi v24, v8, 24
+; CHECK-NEXT:    vor.vv v16, v16, v24
+; CHECK-NEXT:    vand.vx v24, v8, a0
+; CHECK-NEXT:    vsll.vi v24, v24, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v24
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: bswap_nxv16i32:
 ; CHECK-ZVKB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
index 3c4ff5e7d1cefc3..6aac13a0bcbb873 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
@@ -251,37 +251,21 @@ define <vscale x 32 x i16> @vp_bswap_nxv32i16_unmasked(<vscale x 32 x i16> %va,
 declare <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x i32> @vp_bswap_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv1i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_nxv1i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i32:
 ; CHECK-ZVKB:       # %bb.0:
@@ -293,37 +277,21 @@ define <vscale x 1 x i32> @vp_bswap_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1
 }
 
 define <vscale x 1 x i32> @vp_bswap_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv1i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_nxv1i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i32_unmasked:
 ; CHECK-ZVKB:       # %bb.0:
@@ -339,37 +307,21 @@ define <vscale x 1 x i32> @vp_bswap_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
 declare <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x i32> @vp_bswap_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_nxv2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: vp_bswap_nxv2i32:
 ; CHECK-ZVKB:       # %bb.0:
@@ -381,37 +333,21 @@ define <vscale x 2 x i32> @vp_bswap_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2
 }
 
 define <vscale x 2 x i32> @vp_bswap_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_nxv2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: vp_bswap_nxv2i32_unmasked:
 ; CHECK-ZVKB:       # %bb.0:
@@ -427,37 +363,21 @@ define <vscale x 2 x i32> @vp_bswap_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
 declare <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x i32> @vp_bswap_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v10, v10, v12, v0.t
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v12, v12, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_nxv4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v10, v10, v12, v0.t
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v12, v12, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v10, v10, v12, v0.t
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v12, v12, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: vp_bswap_nxv4i32:
 ; CHECK-ZVKB:       # %bb.0:
@@ -469,37 +389,21 @@ define <vscale x 4 x i32> @vp_bswap_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4
 }
 
 define <vscale x 4 x i32> @vp_bswap_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv4i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsrl.vi v12, v8, 24
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsll.vi v12, v12, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_nxv4i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsrl.vi v12, v8, 24
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsll.vi v12, v12, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsrl.vi v12, v8, 24
+; CHECK-NEXT:    vor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsll.vi v12, v12, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: vp_bswap_nxv4i32_unmasked:
 ; CHECK-ZVKB:       # %bb.0:
@@ -515,37 +419,21 @@ define <vscale x 4 x i32> @vp_bswap_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
 declare <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x i32> @vp_bswap_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v16, v16, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_nxv8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v12, v12, v16, v0.t
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v16, v16, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v12, v12, v16, v0.t
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v16, v16, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: vp_bswap_nxv8i32:
 ; CHECK-ZVKB:       # %bb.0:
@@ -557,37 +445,21 @@ define <vscale x 8 x i32> @vp_bswap_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 }
 
 define <vscale x 8 x i32> @vp_bswap_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv8i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsrl.vi v16, v8, 24
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsll.vi v16, v16, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_nxv8i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsrl.vi v16, v8, 24
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsll.vi v16, v16, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsrl.vi v16, v8, 24
+; CHECK-NEXT:    vor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsll.vi v16, v16, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: vp_bswap_nxv8i32_unmasked:
 ; CHECK-ZVKB:       # %bb.0:
@@ -603,37 +475,21 @@ define <vscale x 8 x i32> @vp_bswap_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
 declare <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x i32> @vp_bswap_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vsrl.vi v24, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v16, v16, v24, v0.t
-; RV32-NEXT:    vand.vx v24, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v24, v24, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_nxv16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsrl.vi v24, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v16, v16, v24, v0.t
-; RV64-NEXT:    vand.vx v24, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v24, v24, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v24, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v16, v16, v24, v0.t
+; CHECK-NEXT:    vand.vx v24, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v24, v24, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: vp_bswap_nxv16i32:
 ; CHECK-ZVKB:       # %bb.0:
@@ -645,37 +501,21 @@ define <vscale x 16 x i32> @vp_bswap_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 }
 
 define <vscale x 16 x i32> @vp_bswap_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv16i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsrl.vi v24, v8, 24
-; RV32-NEXT:    vor.vv v16, v16, v24
-; RV32-NEXT:    vand.vx v24, v8, a0
-; RV32-NEXT:    vsll.vi v24, v24, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_nxv16i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsrl.vi v24, v8, 24
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v8, a0
-; RV64-NEXT:    vsll.vi v24, v24, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsrl.vi v24, v8, 24
+; CHECK-NEXT:    vor.vv v16, v16, v24
+; CHECK-NEXT:    vand.vx v24, v8, a0
+; CHECK-NEXT:    vsll.vi v24, v24, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v24
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVKB-LABEL: vp_bswap_nxv16i32_unmasked:
 ; CHECK-ZVKB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
index a128adc8de619ce..46bedcd4e966628 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
@@ -173,21 +173,13 @@ define <vscale x 2 x i64> @vselect_add_const_nxv2i64(<vscale x 2 x i64> %a0) {
 }
 
 define <2 x i16> @vselect_add_const_signbit_v2i16(<2 x i16> %a0) {
-; RV32-LABEL: vselect_add_const_signbit_v2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 8
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vssubu.vx v8, v8, a0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vselect_add_const_signbit_v2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 8
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vssubu.vx v8, v8, a0
-; RV64-NEXT:    ret
+; CHECK-LABEL: vselect_add_const_signbit_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
   %cmp = icmp ugt <2 x i16> %a0, <i16 32766, i16 32766>
   %v1 = add <2 x i16> %a0, <i16 -32767, i16 -32767>
   %v2 = select <2 x i1> %cmp, <2 x i16> %v1, <2 x i16> zeroinitializer
@@ -195,21 +187,13 @@ define <2 x i16> @vselect_add_const_signbit_v2i16(<2 x i16> %a0) {
 }
 
 define <vscale x 2 x i16> @vselect_add_const_signbit_nxv2i16(<vscale x 2 x i16> %a0) {
-; RV32-LABEL: vselect_add_const_signbit_nxv2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 8
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vssubu.vx v8, v8, a0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vselect_add_const_signbit_nxv2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 8
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vssubu.vx v8, v8, a0
-; RV64-NEXT:    ret
+; CHECK-LABEL: vselect_add_const_signbit_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
   %cm1 = insertelement <vscale x 2 x i16> poison, i16 32766, i32 0
   %splatcm1 = shufflevector <vscale x 2 x i16> %cm1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
   %nc = insertelement <vscale x 2 x i16> poison, i16 -32767, i32 0
@@ -318,3 +302,6 @@ declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
 declare <vscale x 2 x i64> @llvm.umin.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
 declare <vscale x 2 x i64> @llvm.umax.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index c1a8c657b4ff86b..d78d67d5e359871 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -394,71 +394,38 @@ define <vscale x 64 x i8> @ctlz_nxv64i8(<vscale x 64 x i8> %va) {
 declare <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8>, i1)
 
 define <vscale x 1 x i16> @ctlz_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32I-LABEL: ctlz_nxv1i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv1i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv1i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_nxv1i16:
 ; CHECK-F:       # %bb.0:
@@ -493,71 +460,38 @@ define <vscale x 1 x i16> @ctlz_nxv1i16(<vscale x 1 x i16> %va) {
 declare <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16>, i1)
 
 define <vscale x 2 x i16> @ctlz_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32I-LABEL: ctlz_nxv2i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv2i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv2i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_nxv2i16:
 ; CHECK-F:       # %bb.0:
@@ -592,71 +526,38 @@ define <vscale x 2 x i16> @ctlz_nxv2i16(<vscale x 2 x i16> %va) {
 declare <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16>, i1)
 
 define <vscale x 4 x i16> @ctlz_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32I-LABEL: ctlz_nxv4i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv4i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv4i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_nxv4i16:
 ; CHECK-F:       # %bb.0:
@@ -691,71 +592,38 @@ define <vscale x 4 x i16> @ctlz_nxv4i16(<vscale x 4 x i16> %va) {
 declare <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16>, i1)
 
 define <vscale x 8 x i16> @ctlz_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32I-LABEL: ctlz_nxv8i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv8i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv8i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_nxv8i16:
 ; CHECK-F:       # %bb.0:
@@ -790,71 +658,38 @@ define <vscale x 8 x i16> @ctlz_nxv8i16(<vscale x 8 x i16> %va) {
 declare <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16>, i1)
 
 define <vscale x 16 x i16> @ctlz_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32I-LABEL: ctlz_nxv16i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v12, v12, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v12, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv16i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v12, v12, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v12, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v12, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv16i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_nxv16i16:
 ; CHECK-F:       # %bb.0:
@@ -889,71 +724,38 @@ define <vscale x 16 x i16> @ctlz_nxv16i16(<vscale x 16 x i16> %va) {
 declare <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16>, i1)
 
 define <vscale x 32 x i16> @ctlz_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: ctlz_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctlz_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctlz_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctlz_nxv32i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -966,77 +768,41 @@ define <vscale x 32 x i16> @ctlz_nxv32i16(<vscale x 32 x i16> %va) {
 declare <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16>, i1)
 
 define <vscale x 1 x i32> @ctlz_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32I-LABEL: ctlz_nxv1i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 16
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv1i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 16
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv1i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 16
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_nxv1i32:
 ; CHECK-F:       # %bb.0:
@@ -1074,77 +840,41 @@ define <vscale x 1 x i32> @ctlz_nxv1i32(<vscale x 1 x i32> %va) {
 declare <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32>, i1)
 
 define <vscale x 2 x i32> @ctlz_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32I-LABEL: ctlz_nxv2i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 16
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv2i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 16
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv2i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 16
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_nxv2i32:
 ; CHECK-F:       # %bb.0:
@@ -1182,77 +912,41 @@ define <vscale x 2 x i32> @ctlz_nxv2i32(<vscale x 2 x i32> %va) {
 declare <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32>, i1)
 
 define <vscale x 4 x i32> @ctlz_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32I-LABEL: ctlz_nxv4i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 16
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv4i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 16
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv4i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 16
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_nxv4i32:
 ; CHECK-F:       # %bb.0:
@@ -1290,77 +984,41 @@ define <vscale x 4 x i32> @ctlz_nxv4i32(<vscale x 4 x i32> %va) {
 declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>, i1)
 
 define <vscale x 8 x i32> @ctlz_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32I-LABEL: ctlz_nxv8i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 16
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v12, v12, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v12, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv8i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 16
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v12, v12, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v12, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v12, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv8i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 16
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_nxv8i32:
 ; CHECK-F:       # %bb.0:
@@ -1398,77 +1056,41 @@ define <vscale x 8 x i32> @ctlz_nxv8i32(<vscale x 8 x i32> %va) {
 declare <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32>, i1)
 
 define <vscale x 16 x i32> @ctlz_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32I-LABEL: ctlz_nxv16i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vsrl.vi v16, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 16
-; RV32I-NEXT:    vor.vv v8, v8, v16
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v16, v16, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v16, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v16, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv16i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV64I-NEXT:    vsrl.vi v16, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v16
-; RV64I-NEXT:    vsrl.vi v16, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v16
-; RV64I-NEXT:    vsrl.vi v16, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v16
-; RV64I-NEXT:    vsrl.vi v16, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v16
-; RV64I-NEXT:    vsrl.vi v16, v8, 16
-; RV64I-NEXT:    vor.vv v8, v8, v16
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v16, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v16, v16, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v16
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v16, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v16, v8
-; RV64I-NEXT:    vsrl.vi v16, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v16
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv16i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 16
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v16, v16, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v16, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_nxv16i32:
 ; CHECK-F:       # %bb.0:
@@ -2425,71 +2047,38 @@ define <vscale x 64 x i8> @ctlz_zero_undef_nxv64i8(<vscale x 64 x i8> %va) {
 }
 
 define <vscale x 1 x i16> @ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv1i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv1i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv1i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv1i16:
 ; CHECK-F:       # %bb.0:
@@ -2519,71 +2108,38 @@ define <vscale x 1 x i16> @ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
 }
 
 define <vscale x 2 x i16> @ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv2i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv2i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv2i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv2i16:
 ; CHECK-F:       # %bb.0:
@@ -2613,71 +2169,38 @@ define <vscale x 2 x i16> @ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
 }
 
 define <vscale x 4 x i16> @ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv4i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv4i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv4i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv4i16:
 ; CHECK-F:       # %bb.0:
@@ -2707,71 +2230,38 @@ define <vscale x 4 x i16> @ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
 }
 
 define <vscale x 8 x i16> @ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv8i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv8i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv8i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv8i16:
 ; CHECK-F:       # %bb.0:
@@ -2801,71 +2291,38 @@ define <vscale x 8 x i16> @ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
 }
 
 define <vscale x 16 x i16> @ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv16i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v12, v12, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v12, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv16i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v12, v12, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v12, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v12, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv16i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv16i16:
 ; CHECK-F:       # %bb.0:
@@ -2895,153 +2352,84 @@ define <vscale x 16 x i16> @ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
 }
 
 define <vscale x 32 x i16> @ctlz_zero_undef_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: ctlz_zero_undef_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctlz_zero_undef_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
-;
-; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv32i16:
-; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-ZVBB-NEXT:    vclz.v v8, v8
+; CHECK-LABEL: ctlz_zero_undef_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv32i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vclz.v v8, v8
 ; CHECK-ZVBB-NEXT:    ret
   %a = call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> %va, i1 true)
   ret <vscale x 32 x i16> %a
 }
 
 define <vscale x 1 x i32> @ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv1i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 16
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv1i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 16
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv1i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 16
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv1i32:
 ; CHECK-F:       # %bb.0:
@@ -3074,77 +2462,41 @@ define <vscale x 1 x i32> @ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
 }
 
 define <vscale x 2 x i32> @ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv2i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 16
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv2i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 16
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv2i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 16
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv2i32:
 ; CHECK-F:       # %bb.0:
@@ -3177,77 +2529,41 @@ define <vscale x 2 x i32> @ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
 }
 
 define <vscale x 4 x i32> @ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv4i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 16
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv4i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 16
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv4i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 16
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv4i32:
 ; CHECK-F:       # %bb.0:
@@ -3280,77 +2596,41 @@ define <vscale x 4 x i32> @ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
 }
 
 define <vscale x 8 x i32> @ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv8i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 16
-; RV32I-NEXT:    vor.vv v8, v8, v12
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v12, v12, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v12, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv8i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 16
-; RV64I-NEXT:    vor.vv v8, v8, v12
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v12, v12, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v12, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v12, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv8i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 16
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv8i32:
 ; CHECK-F:       # %bb.0:
@@ -3383,77 +2663,41 @@ define <vscale x 8 x i32> @ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
 }
 
 define <vscale x 16 x i32> @ctlz_zero_undef_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv16i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vsrl.vi v16, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 8
-; RV32I-NEXT:    vor.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 16
-; RV32I-NEXT:    vor.vv v8, v8, v16
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v16, v16, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v16, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v16, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv16i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV64I-NEXT:    vsrl.vi v16, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v16
-; RV64I-NEXT:    vsrl.vi v16, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v16
-; RV64I-NEXT:    vsrl.vi v16, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v16
-; RV64I-NEXT:    vsrl.vi v16, v8, 8
-; RV64I-NEXT:    vor.vv v8, v8, v16
-; RV64I-NEXT:    vsrl.vi v16, v8, 16
-; RV64I-NEXT:    vor.vv v8, v8, v16
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v16, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v16, v16, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v16
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v16, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v16, v8
-; RV64I-NEXT:    vsrl.vi v16, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v16
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv16i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 16
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v16, v16, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v16, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: ctlz_zero_undef_nxv16i32:
 ; CHECK-F:       # %bb.0:
@@ -4025,3 +3269,6 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
   %a = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 true)
   ret <vscale x 8 x i64> %a
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index bb0ff1c2bf09b0a..f479937ba2fc727 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -675,71 +675,38 @@ define <vscale x 16 x i16> @vp_ctlz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i
 declare <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
 
 define <vscale x 32 x i16> @vp_ctlz_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -751,71 +718,38 @@ define <vscale x 32 x i16> @vp_ctlz_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 }
 
 define <vscale x 32 x i16> @vp_ctlz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv32i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_nxv32i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -2071,71 +2005,38 @@ define <vscale x 16 x i16> @vp_ctlz_zero_undef_nxv16i16_unmasked(<vscale x 16 x
 
 
 define <vscale x 32 x i16> @vp_ctlz_zero_undef_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -2147,71 +2048,38 @@ define <vscale x 32 x i16> @vp_ctlz_zero_undef_nxv32i16(<vscale x 32 x i16> %va,
 }
 
 define <vscale x 32 x i16> @vp_ctlz_zero_undef_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -2862,3 +2730,6 @@ define <vscale x 1 x i9> @vp_ctlz_zero_undef_nxv1i9(<vscale x 1 x i9> %va, <vsca
   %v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
   ret <vscale x 1 x i9> %v
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
index ef0a293ad5fb9ca..1a2b2640ca4fe4c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
@@ -201,53 +201,29 @@ define <vscale x 64 x i8> @ctpop_nxv64i8(<vscale x 64 x i8> %va) {
 declare <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8>)
 
 define <vscale x 1 x i16> @ctpop_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32-LABEL: ctpop_nxv1i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv1i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv1i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -260,53 +236,29 @@ define <vscale x 1 x i16> @ctpop_nxv1i16(<vscale x 1 x i16> %va) {
 declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>)
 
 define <vscale x 2 x i16> @ctpop_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32-LABEL: ctpop_nxv2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv2i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -319,53 +271,29 @@ define <vscale x 2 x i16> @ctpop_nxv2i16(<vscale x 2 x i16> %va) {
 declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>)
 
 define <vscale x 4 x i16> @ctpop_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32-LABEL: ctpop_nxv4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv4i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -378,53 +306,29 @@ define <vscale x 4 x i16> @ctpop_nxv4i16(<vscale x 4 x i16> %va) {
 declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>)
 
 define <vscale x 8 x i16> @ctpop_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32-LABEL: ctpop_nxv8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -437,53 +341,29 @@ define <vscale x 8 x i16> @ctpop_nxv8i16(<vscale x 8 x i16> %va) {
 declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>)
 
 define <vscale x 16 x i16> @ctpop_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32-LABEL: ctpop_nxv16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv16i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -496,53 +376,29 @@ define <vscale x 16 x i16> @ctpop_nxv16i16(<vscale x 16 x i16> %va) {
 declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>)
 
 define <vscale x 32 x i16> @ctpop_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: ctpop_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv32i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -555,55 +411,30 @@ define <vscale x 32 x i16> @ctpop_nxv32i16(<vscale x 32 x i16> %va) {
 declare <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16>)
 
 define <vscale x 1 x i32> @ctpop_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32-LABEL: ctpop_nxv1i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv1i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv1i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -616,55 +447,30 @@ define <vscale x 1 x i32> @ctpop_nxv1i32(<vscale x 1 x i32> %va) {
 declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>)
 
 define <vscale x 2 x i32> @ctpop_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32-LABEL: ctpop_nxv2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv2i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -677,55 +483,30 @@ define <vscale x 2 x i32> @ctpop_nxv2i32(<vscale x 2 x i32> %va) {
 declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>)
 
 define <vscale x 4 x i32> @ctpop_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32-LABEL: ctpop_nxv4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv4i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -738,55 +519,30 @@ define <vscale x 4 x i32> @ctpop_nxv4i32(<vscale x 4 x i32> %va) {
 declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>)
 
 define <vscale x 8 x i32> @ctpop_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32-LABEL: ctpop_nxv8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv8i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -799,55 +555,30 @@ define <vscale x 8 x i32> @ctpop_nxv8i32(<vscale x 8 x i32> %va) {
 declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>)
 
 define <vscale x 16 x i32> @ctpop_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32-LABEL: ctpop_nxv16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ctpop_nxv16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: ctpop_nxv16i32:
 ; CHECK-ZVBB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
index 1c003a33c54bf8b..9767ba4bbc3b7a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
@@ -417,53 +417,29 @@ define <vscale x 64 x i8> @vp_ctpop_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32
 declare <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x i16> @vp_ctpop_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv1i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv1i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -475,53 +451,29 @@ define <vscale x 1 x i16> @vp_ctpop_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1
 }
 
 define <vscale x 1 x i16> @vp_ctpop_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv1i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv1i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -537,53 +489,29 @@ define <vscale x 1 x i16> @vp_ctpop_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32
 declare <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x i16> @vp_ctpop_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -595,53 +523,29 @@ define <vscale x 2 x i16> @vp_ctpop_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2
 }
 
 define <vscale x 2 x i16> @vp_ctpop_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv2i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv2i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -657,53 +561,29 @@ define <vscale x 2 x i16> @vp_ctpop_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32
 declare <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x i16> @vp_ctpop_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -715,53 +595,29 @@ define <vscale x 4 x i16> @vp_ctpop_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4
 }
 
 define <vscale x 4 x i16> @vp_ctpop_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv4i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv4i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -777,53 +633,29 @@ define <vscale x 4 x i16> @vp_ctpop_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32
 declare <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x i16> @vp_ctpop_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -835,53 +667,29 @@ define <vscale x 8 x i16> @vp_ctpop_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8
 }
 
 define <vscale x 8 x i16> @vp_ctpop_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv8i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv8i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -897,53 +705,29 @@ define <vscale x 8 x i16> @vp_ctpop_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32
 declare <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x i16> @vp_ctpop_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -955,53 +739,29 @@ define <vscale x 16 x i16> @vp_ctpop_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 }
 
 define <vscale x 16 x i16> @vp_ctpop_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv16i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv16i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1017,53 +777,29 @@ define <vscale x 16 x i16> @vp_ctpop_nxv16i16_unmasked(<vscale x 16 x i16> %va,
 declare <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32)
 
 define <vscale x 32 x i16> @vp_ctpop_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1075,53 +811,29 @@ define <vscale x 32 x i16> @vp_ctpop_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 }
 
 define <vscale x 32 x i16> @vp_ctpop_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv32i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv32i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1137,55 +849,30 @@ define <vscale x 32 x i16> @vp_ctpop_nxv32i16_unmasked(<vscale x 32 x i16> %va,
 declare <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x i32> @vp_ctpop_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv1i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv1i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1197,55 +884,30 @@ define <vscale x 1 x i32> @vp_ctpop_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1
 }
 
 define <vscale x 1 x i32> @vp_ctpop_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv1i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv1i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1261,55 +923,30 @@ define <vscale x 1 x i32> @vp_ctpop_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
 declare <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x i32> @vp_ctpop_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1321,55 +958,30 @@ define <vscale x 2 x i32> @vp_ctpop_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2
 }
 
 define <vscale x 2 x i32> @vp_ctpop_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1385,55 +997,30 @@ define <vscale x 2 x i32> @vp_ctpop_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
 declare <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x i32> @vp_ctpop_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1445,55 +1032,30 @@ define <vscale x 4 x i32> @vp_ctpop_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4
 }
 
 define <vscale x 4 x i32> @vp_ctpop_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv4i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv4i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1509,55 +1071,30 @@ define <vscale x 4 x i32> @vp_ctpop_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
 declare <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x i32> @vp_ctpop_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1569,55 +1106,30 @@ define <vscale x 8 x i32> @vp_ctpop_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 }
 
 define <vscale x 8 x i32> @vp_ctpop_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv8i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv8i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1633,55 +1145,30 @@ define <vscale x 8 x i32> @vp_ctpop_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
 declare <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x i32> @vp_ctpop_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1693,55 +1180,30 @@ define <vscale x 16 x i32> @vp_ctpop_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 }
 
 define <vscale x 16 x i32> @vp_ctpop_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv16i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv16i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -3133,59 +2595,32 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
 declare <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x i9> @vp_ctpop_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv1i9:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 511
-; RV32-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_nxv1i9:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 511
-; RV64-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_nxv1i9:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 511
+; CHECK-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i9:
 ; CHECK-ZVBB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index 3fcbfa8b142a65e..fb4735240904682 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -403,61 +403,33 @@ define <vscale x 64 x i8> @cttz_nxv64i8(<vscale x 64 x i8> %va) {
 declare <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8>, i1)
 
 define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32I-LABEL: cttz_nxv1i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv1i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv1i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_nxv1i16:
 ; CHECK-F:       # %bb.0:
@@ -498,61 +470,33 @@ define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) {
 declare <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16>, i1)
 
 define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32I-LABEL: cttz_nxv2i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv2i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv2i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_nxv2i16:
 ; CHECK-F:       # %bb.0:
@@ -593,61 +537,33 @@ define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) {
 declare <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16>, i1)
 
 define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32I-LABEL: cttz_nxv4i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv4i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv4i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_nxv4i16:
 ; CHECK-F:       # %bb.0:
@@ -688,61 +604,33 @@ define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
 declare <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16>, i1)
 
 define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32I-LABEL: cttz_nxv8i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV32I-NEXT:    vsub.vx v10, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv8i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV64I-NEXT:    vsub.vx v10, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv8i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_nxv8i16:
 ; CHECK-F:       # %bb.0:
@@ -783,61 +671,33 @@ define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
 declare <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16>, i1)
 
 define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32I-LABEL: cttz_nxv16i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV32I-NEXT:    vsub.vx v12, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v12, v12, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v12, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv16i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV64I-NEXT:    vsub.vx v12, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v12, v12, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v12, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v12, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv16i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_nxv16i16:
 ; CHECK-F:       # %bb.0:
@@ -878,130 +738,73 @@ define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
 declare <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16>, i1)
 
 define <vscale x 32 x i16> @cttz_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: cttz_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a0
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: cttz_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a0
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
-;
-; CHECK-ZVBB-LABEL: cttz_nxv32i16:
-; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-ZVBB-NEXT:    vctz.v v8, v8
-; CHECK-ZVBB-NEXT:    ret
-  %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false)
-  ret <vscale x 32 x i16> %a
+; CHECK-LABEL: cttz_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vsub.vx v16, v8, a0
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: cttz_nxv32i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
+  %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false)
+  ret <vscale x 32 x i16> %a
 }
 declare <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16>, i1)
 
 define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32I-LABEL: cttz_nxv1i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv1i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv1i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_nxv1i32:
 ; CHECK-F:       # %bb.0:
@@ -1045,63 +848,34 @@ define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
 declare <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32>, i1)
 
 define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32I-LABEL: cttz_nxv2i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv2i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv2i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_nxv2i32:
 ; CHECK-F:       # %bb.0:
@@ -1145,63 +919,34 @@ define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
 declare <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32>, i1)
 
 define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32I-LABEL: cttz_nxv4i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vsub.vx v10, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv4i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV64I-NEXT:    vsub.vx v10, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv4i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_nxv4i32:
 ; CHECK-F:       # %bb.0:
@@ -1245,63 +990,34 @@ define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
 declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1)
 
 define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32I-LABEL: cttz_nxv8i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vsub.vx v12, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v12, v12, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v12, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv8i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV64I-NEXT:    vsub.vx v12, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v12, v12, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v12, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v12, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv8i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_nxv8i32:
 ; CHECK-F:       # %bb.0:
@@ -1345,63 +1061,34 @@ define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) {
 declare <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32>, i1)
 
 define <vscale x 16 x i32> @cttz_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32I-LABEL: cttz_nxv16i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vsub.vx v16, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v16, v16, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v16, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v16, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv16i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV64I-NEXT:    vsub.vx v16, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v16
-; RV64I-NEXT:    vsrl.vi v16, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v16, v16, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v16
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v16, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v16, v8
-; RV64I-NEXT:    vsrl.vi v16, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v16
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv16i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v16, v16, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v16, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_nxv16i32:
 ; CHECK-F:       # %bb.0:
@@ -2465,61 +2152,33 @@ define <vscale x 64 x i8> @cttz_zero_undef_nxv64i8(<vscale x 64 x i8> %va) {
 }
 
 define <vscale x 1 x i16> @cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv1i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv1i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_zero_undef_nxv1i16:
 ; CHECK-F:       # %bb.0:
@@ -2527,87 +2186,59 @@ define <vscale x 1 x i16> @cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
 ; CHECK-F-NEXT:    vrsub.vi v9, v8, 0
 ; CHECK-F-NEXT:    vand.vv v8, v8, v9
 ; CHECK-F-NEXT:    vfwcvt.f.xu.v v9, v8
-; CHECK-F-NEXT:    vnsrl.wi v8, v9, 23
-; CHECK-F-NEXT:    li a0, 127
-; CHECK-F-NEXT:    vsub.vx v8, v8, a0
-; CHECK-F-NEXT:    ret
-;
-; CHECK-D-LABEL: cttz_zero_undef_nxv1i16:
-; CHECK-D:       # %bb.0:
-; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
-; CHECK-D-NEXT:    vand.vv v8, v8, v9
-; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
-; CHECK-D-NEXT:    vnsrl.wi v8, v9, 23
-; CHECK-D-NEXT:    li a0, 127
-; CHECK-D-NEXT:    vsub.vx v8, v8, a0
-; CHECK-D-NEXT:    ret
-;
-; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv1i16:
-; CHECK-ZVBB:       # %bb.0:
-; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-ZVBB-NEXT:    vctz.v v8, v8
-; CHECK-ZVBB-NEXT:    ret
-  %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true)
-  ret <vscale x 1 x i16> %a
-}
-
-define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv2i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv2i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-F-NEXT:    vnsrl.wi v8, v9, 23
+; CHECK-F-NEXT:    li a0, 127
+; CHECK-F-NEXT:    vsub.vx v8, v8, a0
+; CHECK-F-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_zero_undef_nxv1i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v9, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
+;
+; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv1i16:
+; CHECK-ZVBB:       # %bb.0:
+; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
+  %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true)
+  ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_zero_undef_nxv2i16:
 ; CHECK-F:       # %bb.0:
@@ -2641,61 +2272,33 @@ define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
 }
 
 define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv4i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv4i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_zero_undef_nxv4i16:
 ; CHECK-F:       # %bb.0:
@@ -2729,61 +2332,33 @@ define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
 }
 
 define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv8i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV32I-NEXT:    vsub.vx v10, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv8i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV64I-NEXT:    vsub.vx v10, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_zero_undef_nxv8i16:
 ; CHECK-F:       # %bb.0:
@@ -2817,61 +2392,33 @@ define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
 }
 
 define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv16i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV32I-NEXT:    vsub.vx v12, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 5
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v12, v12, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 3
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v12, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    li a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 8
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv16i16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV64I-NEXT:    vsub.vx v12, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    lui a0, 5
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v12, v12, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 3
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v12, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v12, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 1
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    li a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 8
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i16:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 5
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 3
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 1
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    li a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_zero_undef_nxv16i16:
 ; CHECK-F:       # %bb.0:
@@ -2905,129 +2452,72 @@ define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
 }
 
 define <vscale x 32 x i16> @cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: cttz_zero_undef_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a0
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: cttz_zero_undef_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a0
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: cttz_zero_undef_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vsub.vx v16, v8, a0
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv32i16:
 ; CHECK-ZVBB:       # %bb.0:
 ; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-ZVBB-NEXT:    vctz.v v8, v8
-; CHECK-ZVBB-NEXT:    ret
-  %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true)
-  ret <vscale x 32 x i16> %a
-}
-
-define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv1i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv1i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVBB-NEXT:    vctz.v v8, v8
+; CHECK-ZVBB-NEXT:    ret
+  %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true)
+  ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_zero_undef_nxv1i32:
 ; CHECK-F:       # %bb.0:
@@ -3064,63 +2554,34 @@ define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
 }
 
 define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv2i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv2i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_zero_undef_nxv2i32:
 ; CHECK-F:       # %bb.0:
@@ -3157,63 +2618,34 @@ define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
 }
 
 define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv4i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vsub.vx v10, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv4i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV64I-NEXT:    vsub.vx v10, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_zero_undef_nxv4i32:
 ; CHECK-F:       # %bb.0:
@@ -3250,63 +2682,34 @@ define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
 }
 
 define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv8i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vsub.vx v12, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v12, v12, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v12, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv8i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV64I-NEXT:    vsub.vx v12, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v12
-; RV64I-NEXT:    vsrl.vi v12, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v12, v12, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v12, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v12, v8
-; RV64I-NEXT:    vsrl.vi v12, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v12
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_zero_undef_nxv8i32:
 ; CHECK-F:       # %bb.0:
@@ -3343,63 +2746,34 @@ define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
 }
 
 define <vscale x 16 x i32> @cttz_zero_undef_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv16i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vsub.vx v16, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vand.vx v16, v16, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vand.vx v16, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v16, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    lui a0, 4112
-; RV32I-NEXT:    addi a0, a0, 257
-; RV32I-NEXT:    vmul.vx v8, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 24
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv16i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV64I-NEXT:    vsub.vx v16, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v16
-; RV64I-NEXT:    vsrl.vi v16, v8, 1
-; RV64I-NEXT:    lui a0, 349525
-; RV64I-NEXT:    addiw a0, a0, 1365
-; RV64I-NEXT:    vand.vx v16, v16, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v16
-; RV64I-NEXT:    lui a0, 209715
-; RV64I-NEXT:    addiw a0, a0, 819
-; RV64I-NEXT:    vand.vx v16, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v16, v8
-; RV64I-NEXT:    vsrl.vi v16, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v16
-; RV64I-NEXT:    lui a0, 61681
-; RV64I-NEXT:    addiw a0, a0, -241
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    lui a0, 4112
-; RV64I-NEXT:    addiw a0, a0, 257
-; RV64I-NEXT:    vmul.vx v8, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 24
-; RV64I-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i32:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-ZVE64X-NEXT:    vsub.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT:    lui a0, 349525
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT:    vand.vx v16, v16, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    lui a0, 209715
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 819
+; CHECK-ZVE64X-NEXT:    vand.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v16, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT:    lui a0, 61681
+; CHECK-ZVE64X-NEXT:    addi a0, a0, -241
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    lui a0, 4112
+; CHECK-ZVE64X-NEXT:    addi a0, a0, 257
+; CHECK-ZVE64X-NEXT:    vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT:    ret
 ;
 ; CHECK-F-LABEL: cttz_zero_undef_nxv16i32:
 ; CHECK-F:       # %bb.0:
@@ -3915,3 +3289,6 @@ define <vscale x 8 x i64> @cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
   %a = call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true)
   ret <vscale x 8 x i64> %a
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index 1794b0cce7f86a0..1dda8aa458d9069 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -473,61 +473,33 @@ define <vscale x 64 x i8> @vp_cttz_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32
 declare <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x i16> @vp_cttz_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv1i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv1i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -539,61 +511,33 @@ define <vscale x 1 x i16> @vp_cttz_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x
 }
 
 define <vscale x 1 x i16> @vp_cttz_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv1i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv1i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -609,61 +553,33 @@ define <vscale x 1 x i16> @vp_cttz_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32
 declare <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x i16> @vp_cttz_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -675,61 +591,33 @@ define <vscale x 2 x i16> @vp_cttz_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x
 }
 
 define <vscale x 2 x i16> @vp_cttz_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv2i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv2i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -745,61 +633,33 @@ define <vscale x 2 x i16> @vp_cttz_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32
 declare <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x i16> @vp_cttz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -811,61 +671,33 @@ define <vscale x 4 x i16> @vp_cttz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x
 }
 
 define <vscale x 4 x i16> @vp_cttz_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv4i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv4i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -881,61 +713,33 @@ define <vscale x 4 x i16> @vp_cttz_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32
 declare <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x i16> @vp_cttz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -947,61 +751,33 @@ define <vscale x 8 x i16> @vp_cttz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x
 }
 
 define <vscale x 8 x i16> @vp_cttz_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv8i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv8i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1017,61 +793,33 @@ define <vscale x 8 x i16> @vp_cttz_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32
 declare <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x i16> @vp_cttz_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsub.vx v12, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1083,61 +831,33 @@ define <vscale x 16 x i16> @vp_cttz_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 }
 
 define <vscale x 16 x i16> @vp_cttz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv16i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv16i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsub.vx v12, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1153,61 +873,33 @@ define <vscale x 16 x i16> @vp_cttz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i
 declare <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
 
 define <vscale x 32 x i16> @vp_cttz_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsub.vx v16, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1219,61 +911,33 @@ define <vscale x 32 x i16> @vp_cttz_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 }
 
 define <vscale x 32 x i16> @vp_cttz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv32i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv32i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsub.vx v16, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1289,63 +953,34 @@ define <vscale x 32 x i16> @vp_cttz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i
 declare <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x i32> @vp_cttz_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv1i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv1i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1357,63 +992,34 @@ define <vscale x 1 x i32> @vp_cttz_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x
 }
 
 define <vscale x 1 x i32> @vp_cttz_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv1i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv1i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1429,63 +1035,34 @@ define <vscale x 1 x i32> @vp_cttz_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
 declare <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x i32> @vp_cttz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1497,63 +1074,34 @@ define <vscale x 2 x i32> @vp_cttz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x
 }
 
 define <vscale x 2 x i32> @vp_cttz_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1569,63 +1117,34 @@ define <vscale x 2 x i32> @vp_cttz_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
 declare <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
 
 define <vscale x 4 x i32> @vp_cttz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1637,63 +1156,34 @@ define <vscale x 4 x i32> @vp_cttz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x
 }
 
 define <vscale x 4 x i32> @vp_cttz_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv4i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv4i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1709,63 +1199,34 @@ define <vscale x 4 x i32> @vp_cttz_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
 declare <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
 
 define <vscale x 8 x i32> @vp_cttz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsub.vx v12, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1777,63 +1238,34 @@ define <vscale x 8 x i32> @vp_cttz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x
 }
 
 define <vscale x 8 x i32> @vp_cttz_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv8i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv8i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsub.vx v12, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1849,63 +1281,34 @@ define <vscale x 8 x i32> @vp_cttz_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
 declare <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
 
 define <vscale x 16 x i32> @vp_cttz_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsub.vx v16, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32:
 ; CHECK-ZVBB:       # %bb.0:
@@ -1917,63 +1320,34 @@ define <vscale x 16 x i32> @vp_cttz_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 }
 
 define <vscale x 16 x i32> @vp_cttz_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv16i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv16i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsub.vx v16, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -4138,61 +3512,33 @@ define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16_unmasked(<vscale x 16 x
 
 
 define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsub.vx v16, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16:
 ; CHECK-ZVBB:       # %bb.0:
@@ -4204,61 +3550,33 @@ define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va,
 }
 
 define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v16, v16, a0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsub.vx v16, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v16
+; CHECK-NEXT:    vsrl.vi v16, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    vsrl.vi v16, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
 ; CHECK-ZVBB:       # %bb.0:
@@ -4918,65 +4236,35 @@ define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64_unmasked(<vscale x 16 x
 ; Test promotion.
 declare <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9>, i1 immarg, <vscale x 1 x i1>, i32)
 define <vscale x 1 x i9> @vp_cttz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv1i9:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 512
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vor.vx v8, v8, a1, v0.t
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    vsub.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_nxv1i9:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 512
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vor.vx v8, v8, a1, v0.t
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    vsub.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_nxv1i9:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 512
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vor.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    vsub.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv1i9:
 ; CHECK-ZVBB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
index f5e5b9e9083b8dc..34dcce3fe058bc9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
@@ -769,7 +769,7 @@ define i32 @extractelt_sdiv_nxv4i32_splat(<vscale x 4 x i32> %x) {
 ; RV64NOM-LABEL: extractelt_sdiv_nxv4i32_splat:
 ; RV64NOM:       # %bb.0:
 ; RV64NOM-NEXT:    lui a0, 349525
-; RV64NOM-NEXT:    addiw a0, a0, 1366
+; RV64NOM-NEXT:    addi a0, a0, 1366
 ; RV64NOM-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV64NOM-NEXT:    vmulh.vx v8, v8, a0
 ; RV64NOM-NEXT:    vsrl.vi v10, v8, 31
@@ -799,7 +799,7 @@ define i32 @extractelt_udiv_nxv4i32_splat(<vscale x 4 x i32> %x) {
 ; RV64NOM-LABEL: extractelt_udiv_nxv4i32_splat:
 ; RV64NOM:       # %bb.0:
 ; RV64NOM-NEXT:    lui a0, 349525
-; RV64NOM-NEXT:    addiw a0, a0, 1366
+; RV64NOM-NEXT:    addi a0, a0, 1366
 ; RV64NOM-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV64NOM-NEXT:    vmulh.vx v8, v8, a0
 ; RV64NOM-NEXT:    vsrl.vi v10, v8, 31
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
index cf8dfea197afae7..22f92fe48e22e71 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
@@ -231,125 +231,67 @@ define <16 x i8> @vp_bitreverse_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl)
 declare <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16>, <2 x i1>, i32)
 
 define <2 x i16> @vp_bitreverse_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
   ret <2 x i16> %v
 }
 
 define <2 x i16> @vp_bitreverse_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v2i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v2i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
@@ -359,125 +301,67 @@ define <2 x i16> @vp_bitreverse_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl)
 declare <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16>, <4 x i1>, i32)
 
 define <4 x i16> @vp_bitreverse_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
 }
 
 define <4 x i16> @vp_bitreverse_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v4i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v4i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
@@ -487,125 +371,67 @@ define <4 x i16> @vp_bitreverse_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl)
 declare <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16>, <8 x i1>, i32)
 
 define <8 x i16> @vp_bitreverse_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
   ret <8 x i16> %v
 }
 
 define <8 x i16> @vp_bitreverse_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v8i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v8i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
@@ -615,125 +441,67 @@ define <8 x i16> @vp_bitreverse_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl)
 declare <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16>, <16 x i1>, i32)
 
 define <16 x i16> @vp_bitreverse_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
   ret <16 x i16> %v
 }
 
 define <16 x i16> @vp_bitreverse_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v16i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v16i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
@@ -743,157 +511,83 @@ define <16 x i16> @vp_bitreverse_v16i16_unmasked(<16 x i16> %va, i32 zeroext %ev
 declare <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32>, <2 x i1>, i32)
 
 define <2 x i32> @vp_bitreverse_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
   ret <2 x i32> %v
 }
 
 define <2 x i32> @vp_bitreverse_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
@@ -903,317 +597,169 @@ define <2 x i32> @vp_bitreverse_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl)
 declare <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32>, <4 x i1>, i32)
 
 define <4 x i32> @vp_bitreverse_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i32> %v
 }
 
 define <4 x i32> @vp_bitreverse_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v4i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v4i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    ret
-  %head = insertelement <4 x i1> poison, i1 true, i32 0
-  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
-  %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
-  ret <4 x i32> %v
-}
-
-declare <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32>, <8 x i1>, i32)
-
-define <8 x i32> @vp_bitreverse_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v10, v10, v12, v0.t
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v12, v12, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v10, v10, v12, v0.t
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v12, v12, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
-; RV64-NEXT:    ret
-  %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
-  ret <8 x i32> %v
-}
-
-define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v8i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsrl.vi v12, v8, 24
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsll.vi v12, v12, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v10, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v8i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsrl.vi v12, v8, 24
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsll.vi v12, v12, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v10, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vp_bitreverse_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_bitreverse_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v10, v10, v12, v0.t
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v12, v12, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_bitreverse_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsrl.vi v12, v8, 24
+; CHECK-NEXT:    vor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsll.vi v12, v12, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
@@ -1223,157 +769,83 @@ define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl)
 declare <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32>, <16 x i1>, i32)
 
 define <16 x i32> @vp_bitreverse_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v16, v16, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v12, v12, v16, v0.t
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v16, v16, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v12, v12, v16, v0.t
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v16, v16, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
   ret <16 x i32> %v
 }
 
 define <16 x i32> @vp_bitreverse_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v16i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsrl.vi v16, v8, 24
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsll.vi v16, v16, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v12, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v16i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsrl.vi v16, v8, 24
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsll.vi v16, v16, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v12, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsrl.vi v16, v8, 24
+; CHECK-NEXT:    vor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsll.vi v16, v16, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v12, v8
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
@@ -2945,295 +2417,152 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev
 declare <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16>, <128 x i1>, i32)
 
 define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v128i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 4
-; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 3
-; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; RV32-NEXT:    li a2, 64
-; RV32-NEXT:    vslidedown.vi v24, v0, 8
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    bltu a0, a2, .LBB34_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    li a1, 64
-; RV32-NEXT:  .LBB34_2:
-; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    lui a1, 1
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vand.vx v16, v16, a1, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a1, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT:    lui a2, 3
-; RV32-NEXT:    addi a2, a2, 819
-; RV32-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a2, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a3, 5
-; RV32-NEXT:    addi a3, a3, 1365
-; RV32-NEXT:    vand.vx v16, v16, a3, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    addi a4, sp, 16
-; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a4, a0, -64
-; RV32-NEXT:    sltu a0, a0, a4
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    and a0, a0, a4
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vmv1r.v v0, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vand.vx v16, v16, a1, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a1, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a2, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    vand.vx v16, v16, a3, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v16, v16, v8, v0.t
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v128i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    sub sp, sp, a1
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; RV64-NEXT:    li a2, 64
-; RV64-NEXT:    vslidedown.vi v24, v0, 8
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB34_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    li a1, 64
-; RV64-NEXT:  .LBB34_2:
-; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    lui a1, 1
-; RV64-NEXT:    addiw a1, a1, -241
-; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT:    lui a2, 3
-; RV64-NEXT:    addiw a2, a2, 819
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a3, 5
-; RV64-NEXT:    addiw a3, a3, 1365
-; RV64-NEXT:    vand.vx v16, v16, a3, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    addi a4, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
-; RV64-NEXT:    addi a4, a0, -64
-; RV64-NEXT:    sltu a0, a0, a4
-; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a4
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vmv1r.v v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a3, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v16, v16, v8, v0.t
-; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    addi sp, sp, 16
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v128i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    add a1, sp, a1
+; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
+; CHECK-NEXT:    li a2, 64
+; CHECK-NEXT:    vslidedown.vi v24, v0, 8
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    bltu a0, a2, .LBB34_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:  .LBB34_2:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    lui a1, 1
+; CHECK-NEXT:    addi a1, a1, -241
+; CHECK-NEXT:    vand.vx v16, v16, a1, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT:    lui a2, 3
+; CHECK-NEXT:    addi a2, a2, 819
+; CHECK-NEXT:    vand.vx v16, v16, a2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    lui a3, 5
+; CHECK-NEXT:    addi a3, a3, 1365
+; CHECK-NEXT:    vand.vx v16, v16, a3, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a3, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    addi a4, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT:    addi a4, a0, -64
+; CHECK-NEXT:    sltu a0, a0, a4
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    and a0, a0, a4
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT:    vand.vx v16, v16, a1, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v16, v16, a2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    vand.vx v16, v16, a3, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a3, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v16, v16, v8, v0.t
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 4
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
   %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl)
   ret <128 x i16> %v
 }
 
 define <128 x i16> @vp_bitreverse_v128i16_unmasked(<128 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v128i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a2, 64
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    bltu a0, a2, .LBB35_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    li a1, 64
-; RV32-NEXT:  .LBB35_2:
-; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v24, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    lui a1, 1
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vand.vx v24, v24, a1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 2
-; RV32-NEXT:    lui a2, 3
-; RV32-NEXT:    addi a2, a2, 819
-; RV32-NEXT:    vand.vx v24, v24, a2
-; RV32-NEXT:    vand.vx v8, v8, a2
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 1
-; RV32-NEXT:    lui a3, 5
-; RV32-NEXT:    addi a3, a3, 1365
-; RV32-NEXT:    vand.vx v24, v24, a3
-; RV32-NEXT:    vand.vx v8, v8, a3
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    addi a4, a0, -64
-; RV32-NEXT:    sltu a0, a0, a4
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    and a0, a0, a4
-; RV32-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v24, v16, 8
-; RV32-NEXT:    vsll.vi v16, v16, 8
-; RV32-NEXT:    vor.vv v16, v16, v24
-; RV32-NEXT:    vsrl.vi v24, v16, 4
-; RV32-NEXT:    vand.vx v24, v24, a1
-; RV32-NEXT:    vand.vx v16, v16, a1
-; RV32-NEXT:    vsll.vi v16, v16, 4
-; RV32-NEXT:    vor.vv v16, v24, v16
-; RV32-NEXT:    vsrl.vi v24, v16, 2
-; RV32-NEXT:    vand.vx v24, v24, a2
-; RV32-NEXT:    vand.vx v16, v16, a2
-; RV32-NEXT:    vsll.vi v16, v16, 2
-; RV32-NEXT:    vor.vv v16, v24, v16
-; RV32-NEXT:    vsrl.vi v24, v16, 1
-; RV32-NEXT:    vand.vx v24, v24, a3
-; RV32-NEXT:    vand.vx v16, v16, a3
-; RV32-NEXT:    vadd.vv v16, v16, v16
-; RV32-NEXT:    vor.vv v16, v24, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bitreverse_v128i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a2, 64
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB35_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    li a1, 64
-; RV64-NEXT:  .LBB35_2:
-; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vsrl.vi v24, v8, 4
-; RV64-NEXT:    lui a1, 1
-; RV64-NEXT:    addiw a1, a1, -241
-; RV64-NEXT:    vand.vx v24, v24, a1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 2
-; RV64-NEXT:    lui a2, 3
-; RV64-NEXT:    addiw a2, a2, 819
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vand.vx v8, v8, a2
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    lui a3, 5
-; RV64-NEXT:    addiw a3, a3, 1365
-; RV64-NEXT:    vand.vx v24, v24, a3
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v24, v8
-; RV64-NEXT:    addi a4, a0, -64
-; RV64-NEXT:    sltu a0, a0, a4
-; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a4
-; RV64-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v16, 8
-; RV64-NEXT:    vsll.vi v16, v16, 8
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vand.vx v24, v24, a1
-; RV64-NEXT:    vand.vx v16, v16, a1
-; RV64-NEXT:    vsll.vi v16, v16, 4
-; RV64-NEXT:    vor.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 2
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vand.vx v16, v16, a2
-; RV64-NEXT:    vsll.vi v16, v16, 2
-; RV64-NEXT:    vor.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vand.vx v24, v24, a3
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vadd.vv v16, v16, v16
-; RV64-NEXT:    vor.vv v16, v24, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bitreverse_v128i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 64
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    bltu a0, a2, .LBB35_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:  .LBB35_2:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v24, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v24
+; CHECK-NEXT:    vsrl.vi v24, v8, 4
+; CHECK-NEXT:    lui a1, 1
+; CHECK-NEXT:    addi a1, a1, -241
+; CHECK-NEXT:    vand.vx v24, v24, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v24, v8
+; CHECK-NEXT:    vsrl.vi v24, v8, 2
+; CHECK-NEXT:    lui a2, 3
+; CHECK-NEXT:    addi a2, a2, 819
+; CHECK-NEXT:    vand.vx v24, v24, a2
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v24, v8
+; CHECK-NEXT:    vsrl.vi v24, v8, 1
+; CHECK-NEXT:    lui a3, 5
+; CHECK-NEXT:    addi a3, a3, 1365
+; CHECK-NEXT:    vand.vx v24, v24, a3
+; CHECK-NEXT:    vand.vx v8, v8, a3
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v24, v8
+; CHECK-NEXT:    addi a4, a0, -64
+; CHECK-NEXT:    sltu a0, a0, a4
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    and a0, a0, a4
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsrl.vi v24, v16, 8
+; CHECK-NEXT:    vsll.vi v16, v16, 8
+; CHECK-NEXT:    vor.vv v16, v16, v24
+; CHECK-NEXT:    vsrl.vi v24, v16, 4
+; CHECK-NEXT:    vand.vx v24, v24, a1
+; CHECK-NEXT:    vand.vx v16, v16, a1
+; CHECK-NEXT:    vsll.vi v16, v16, 4
+; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    vsrl.vi v24, v16, 2
+; CHECK-NEXT:    vand.vx v24, v24, a2
+; CHECK-NEXT:    vand.vx v16, v16, a2
+; CHECK-NEXT:    vsll.vi v16, v16, 2
+; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    vsrl.vi v24, v16, 1
+; CHECK-NEXT:    vand.vx v24, v24, a3
+; CHECK-NEXT:    vand.vx v16, v16, a3
+; CHECK-NEXT:    vadd.vv v16, v16, v16
+; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    ret
   %head = insertelement <128 x i1> poison, i1 true, i32 0
   %m = shufflevector <128 x i1> %head, <128 x i1> poison, <128 x i32> zeroinitializer
   %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
index 06256d49f12b2ca..74a3153b5839dc6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
@@ -47,21 +47,21 @@ define void @bitreverse_v8i16(ptr %x, ptr %y) {
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    vsrl.vi v9, v8, 4
 ; RV64-NEXT:    lui a1, 1
-; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    addi a1, a1, -241
 ; RV64-NEXT:    vand.vx v9, v9, a1
 ; RV64-NEXT:    vand.vx v8, v8, a1
 ; RV64-NEXT:    vsll.vi v8, v8, 4
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    vsrl.vi v9, v8, 2
 ; RV64-NEXT:    lui a1, 3
-; RV64-NEXT:    addiw a1, a1, 819
+; RV64-NEXT:    addi a1, a1, 819
 ; RV64-NEXT:    vand.vx v9, v9, a1
 ; RV64-NEXT:    vand.vx v8, v8, a1
 ; RV64-NEXT:    vsll.vi v8, v8, 2
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    vsrl.vi v9, v8, 1
 ; RV64-NEXT:    lui a1, 5
-; RV64-NEXT:    addiw a1, a1, 1365
+; RV64-NEXT:    addi a1, a1, 1365
 ; RV64-NEXT:    vand.vx v9, v9, a1
 ; RV64-NEXT:    vand.vx v8, v8, a1
 ; RV64-NEXT:    vadd.vv v8, v8, v8
@@ -130,7 +130,7 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) {
 ; RV64-NEXT:    vle32.v v8, (a0)
 ; RV64-NEXT:    vsrl.vi v9, v8, 8
 ; RV64-NEXT:    lui a1, 16
-; RV64-NEXT:    addiw a1, a1, -256
+; RV64-NEXT:    addi a1, a1, -256
 ; RV64-NEXT:    vand.vx v9, v9, a1
 ; RV64-NEXT:    vsrl.vi v10, v8, 24
 ; RV64-NEXT:    vor.vv v9, v9, v10
@@ -141,21 +141,21 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) {
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    vsrl.vi v9, v8, 4
 ; RV64-NEXT:    lui a1, 61681
-; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    addi a1, a1, -241
 ; RV64-NEXT:    vand.vx v9, v9, a1
 ; RV64-NEXT:    vand.vx v8, v8, a1
 ; RV64-NEXT:    vsll.vi v8, v8, 4
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    vsrl.vi v9, v8, 2
 ; RV64-NEXT:    lui a1, 209715
-; RV64-NEXT:    addiw a1, a1, 819
+; RV64-NEXT:    addi a1, a1, 819
 ; RV64-NEXT:    vand.vx v9, v9, a1
 ; RV64-NEXT:    vand.vx v8, v8, a1
 ; RV64-NEXT:    vsll.vi v8, v8, 2
 ; RV64-NEXT:    vor.vv v8, v9, v8
 ; RV64-NEXT:    vsrl.vi v9, v8, 1
 ; RV64-NEXT:    lui a1, 349525
-; RV64-NEXT:    addiw a1, a1, 1365
+; RV64-NEXT:    addi a1, a1, 1365
 ; RV64-NEXT:    vand.vx v9, v9, a1
 ; RV64-NEXT:    vand.vx v8, v8, a1
 ; RV64-NEXT:    vadd.vv v8, v8, v8
@@ -368,21 +368,21 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) {
 ; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
 ; LMULMAX2-RV64-NEXT:    lui a1, 1
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 4
 ; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 2
 ; LMULMAX2-RV64-NEXT:    lui a1, 3
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 2
 ; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX2-RV64-NEXT:    lui a1, 5
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v8
@@ -453,21 +453,21 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) {
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
 ; LMULMAX1-RV64-NEXT:    lui a2, 1
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, -241
+; LMULMAX1-RV64-NEXT:    addi a2, a2, -241
 ; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
 ; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a2
 ; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 4
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 2
 ; LMULMAX1-RV64-NEXT:    lui a3, 3
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, 819
+; LMULMAX1-RV64-NEXT:    addi a3, a3, 819
 ; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
 ; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
 ; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 2
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX1-RV64-NEXT:    lui a4, 5
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, 1365
+; LMULMAX1-RV64-NEXT:    addi a4, a4, 1365
 ; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a4
 ; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
 ; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v8
@@ -555,7 +555,7 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
 ; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 8
 ; LMULMAX2-RV64-NEXT:    lui a1, 16
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -256
+; LMULMAX2-RV64-NEXT:    addi a1, a1, -256
 ; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v12, v8, 24
 ; LMULMAX2-RV64-NEXT:    vor.vv v10, v10, v12
@@ -566,21 +566,21 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
 ; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
 ; LMULMAX2-RV64-NEXT:    lui a1, 61681
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 4
 ; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 2
 ; LMULMAX2-RV64-NEXT:    lui a1, 209715
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 2
 ; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX2-RV64-NEXT:    lui a1, 349525
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v8
@@ -662,7 +662,7 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
 ; LMULMAX1-RV64-NEXT:    vle32.v v9, (a0)
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 8
 ; LMULMAX1-RV64-NEXT:    lui a2, 16
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, -256
+; LMULMAX1-RV64-NEXT:    addi a2, a2, -256
 ; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v8, 24
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v10, v11
@@ -673,21 +673,21 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
 ; LMULMAX1-RV64-NEXT:    lui a3, 61681
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, -241
+; LMULMAX1-RV64-NEXT:    addi a3, a3, -241
 ; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
 ; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
 ; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 4
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 2
 ; LMULMAX1-RV64-NEXT:    lui a4, 209715
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, 819
+; LMULMAX1-RV64-NEXT:    addi a4, a4, 819
 ; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a4
 ; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
 ; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 2
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX1-RV64-NEXT:    lui a5, 349525
-; LMULMAX1-RV64-NEXT:    addiw a5, a5, 1365
+; LMULMAX1-RV64-NEXT:    addi a5, a5, 1365
 ; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a5
 ; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a5
 ; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
index 050fa3bdaa7c9a5..22061040ddbc1c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
@@ -119,73 +119,41 @@ define <16 x i16> @vp_bswap_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
 declare <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32>, <2 x i1>, i32)
 
 define <2 x i32> @vp_bswap_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_v2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
   ret <2 x i32> %v
 }
 
 define <2 x i32> @vp_bswap_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_v2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
@@ -195,73 +163,41 @@ define <2 x i32> @vp_bswap_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
 declare <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32>, <4 x i1>, i32)
 
 define <4 x i32> @vp_bswap_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v9, v9, v10, v0.t
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i32> %v
 }
 
 define <4 x i32> @vp_bswap_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v4i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_v4i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
@@ -271,73 +207,41 @@ define <4 x i32> @vp_bswap_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
 declare <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32>, <8 x i1>, i32)
 
 define <8 x i32> @vp_bswap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v10, v10, v12, v0.t
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v12, v12, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_v8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v10, v10, v12, v0.t
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v12, v12, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v10, v10, v12, v0.t
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v12, v12, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
   ret <8 x i32> %v
 }
 
 define <8 x i32> @vp_bswap_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v8i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsrl.vi v12, v8, 24
-; RV32-NEXT:    vor.vv v10, v10, v12
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsll.vi v12, v12, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_v8i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsrl.vi v12, v8, 24
-; RV64-NEXT:    vor.vv v10, v10, v12
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsll.vi v12, v12, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsrl.vi v12, v8, 24
+; CHECK-NEXT:    vor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsll.vi v12, v12, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
@@ -347,73 +251,41 @@ define <8 x i32> @vp_bswap_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
 declare <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32>, <16 x i1>, i32)
 
 define <16 x i32> @vp_bswap_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV32-NEXT:    vsll.vi v16, v16, 8, v0.t
-; RV32-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_v16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v12, v12, v16, v0.t
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsll.vi v16, v16, 8, v0.t
-; RV64-NEXT:    vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v16, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v12, v12, v16, v0.t
+; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT:    vsll.vi v16, v16, 8, v0.t
+; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
   ret <16 x i32> %v
 }
 
 define <16 x i32> @vp_bswap_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v16i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -256
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsrl.vi v16, v8, 24
-; RV32-NEXT:    vor.vv v12, v12, v16
-; RV32-NEXT:    vand.vx v16, v8, a0
-; RV32-NEXT:    vsll.vi v16, v16, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_bswap_v16i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -256
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsrl.vi v16, v8, 24
-; RV64-NEXT:    vor.vv v12, v12, v16
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsll.vi v16, v16, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_bswap_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -256
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsrl.vi v16, v8, 24
+; CHECK-NEXT:    vor.vv v12, v12, v16
+; CHECK-NEXT:    vand.vx v16, v8, a0
+; CHECK-NEXT:    vsll.vi v16, v16, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
index 4d78da2d6476035..628a3e072abcd01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
@@ -33,41 +33,23 @@ define void @bswap_v8i16(ptr %x, ptr %y) {
 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
 
 define void @bswap_v4i32(ptr %x, ptr %y) {
-; RV32-LABEL: bswap_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a1, 16
-; RV32-NEXT:    addi a1, a1, -256
-; RV32-NEXT:    vand.vx v9, v9, a1
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a1
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bswap_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a1, 16
-; RV64-NEXT:    addiw a1, a1, -256
-; RV64-NEXT:    vand.vx v9, v9, a1
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a1
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: bswap_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a1, 16
+; CHECK-NEXT:    addi a1, a1, -256
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a1
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVKB-LABEL: bswap_v4i32:
 ; ZVKB:       # %bb.0:
@@ -269,7 +251,7 @@ define void @bswap_v8i32(ptr %x, ptr %y) {
 ; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 8
 ; LMULMAX2-RV64-NEXT:    lui a1, 16
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -256
+; LMULMAX2-RV64-NEXT:    addi a1, a1, -256
 ; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64-NEXT:    vsrl.vi v12, v8, 24
 ; LMULMAX2-RV64-NEXT:    vor.vv v10, v10, v12
@@ -319,7 +301,7 @@ define void @bswap_v8i32(ptr %x, ptr %y) {
 ; LMULMAX1-RV64-NEXT:    vle32.v v9, (a0)
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 8
 ; LMULMAX1-RV64-NEXT:    lui a2, 16
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, -256
+; LMULMAX1-RV64-NEXT:    addi a2, a2, -256
 ; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v8, 24
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v10, v11
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index 9e4584eb17ff9a7..2bbc04172bd1421 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -236,7 +236,7 @@ define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d)
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    addiw a1, a1, 25
 ; RV64-NEXT:    addiw a2, a2, 1
-; RV64-NEXT:    addiw a3, a3, 2047
+; RV64-NEXT:    addi a3, a3, 2047
 ; RV64-NEXT:    addiw a3, a3, 308
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vslide1down.vx v8, v8, a0
@@ -273,7 +273,7 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    addiw a1, a1, 25
 ; RV64-NEXT:    addiw a2, a2, 1
-; RV64-NEXT:    addiw a3, a3, 2047
+; RV64-NEXT:    addi a3, a3, 2047
 ; RV64-NEXT:    addiw a3, a3, 308
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vslide1down.vx v8, v8, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
index e53877f53833fe6..d47971ef5a13ca6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
@@ -247,141 +247,75 @@ define <16 x i8> @vp_ctlz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
 declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
 
 define <2 x i16> @vp_ctlz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl)
   ret <2 x i16> %v
 }
 
 define <2 x i16> @vp_ctlz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v2i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl)
@@ -391,141 +325,75 @@ define <2 x i16> @vp_ctlz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
 declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
 
 define <4 x i16> @vp_ctlz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
 }
 
 define <4 x i16> @vp_ctlz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v4i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl)
@@ -535,141 +403,75 @@ define <4 x i16> @vp_ctlz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
 declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
 
 define <8 x i16> @vp_ctlz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl)
   ret <8 x i16> %v
 }
 
 define <8 x i16> @vp_ctlz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v8i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl)
@@ -679,141 +481,75 @@ define <8 x i16> @vp_ctlz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
 declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
 
 define <16 x i16> @vp_ctlz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl)
   ret <16 x i16> %v
 }
 
 define <16 x i16> @vp_ctlz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v16i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl)
@@ -823,153 +559,81 @@ define <16 x i16> @vp_ctlz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
 declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
 
 define <2 x i32> @vp_ctlz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl)
   ret <2 x i32> %v
 }
 
 define <2 x i32> @vp_ctlz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 16
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl)
@@ -979,9 +643,261 @@ define <2 x i32> @vp_ctlz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
 declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
 
 define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i32:
+; CHECK-LABEL: vp_ctlz_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 16
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
+
+define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 16
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
+
+define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 16
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
+
+define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
@@ -992,32 +908,48 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 16, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsrl.vx v9, v8, a1, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
+; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
+; RV32-NEXT:    li a0, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v4i32:
+; RV64-LABEL: vp_ctlz_v2i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
@@ -1028,14 +960,21 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    vsrl.vi v9, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsrl.vx v9, v8, a0, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
 ; RV64-NEXT:    lui a0, 349525
 ; RV64-NEXT:    addiw a0, a0, 1365
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
 ; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    lui a0, 209715
 ; RV64-NEXT:    addiw a0, a0, 819
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
 ; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
@@ -1044,20 +983,25 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    lui a0, 61681
 ; RV64-NEXT:    addiw a0, a0, -241
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    lui a0, 4112
 ; RV64-NEXT:    addiw a0, a0, 257
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
-  %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
-  ret <4 x i32> %v
+  %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
 }
 
-define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i32_unmasked:
+define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v9
 ; RV32-NEXT:    vsrl.vi v9, v8, 2
@@ -1068,32 +1012,48 @@ define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v9
 ; RV32-NEXT:    vsrl.vi v9, v8, 16
 ; RV32-NEXT:    vor.vv v8, v8, v9
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsrl.vx v9, v8, a1
+; RV32-NEXT:    vor.vv v8, v8, v9
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v10, v8, v9
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
+; RV32-NEXT:    vand.vv v8, v8, v9
+; RV32-NEXT:    vadd.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v9
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v9
+; RV32-NEXT:    li a0, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a0
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v4i32_unmasked:
+; RV64-LABEL: vp_ctlz_v2i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; RV64-NEXT:    vsrl.vi v9, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    vsrl.vi v9, v8, 2
@@ -1104,14 +1064,21 @@ define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    vsrl.vi v9, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v9
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsrl.vx v9, v8, a0
+; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    vnot.v v8, v8
 ; RV64-NEXT:    vsrl.vi v9, v8, 1
 ; RV64-NEXT:    lui a0, 349525
 ; RV64-NEXT:    addiw a0, a0, 1365
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v9, v9, a0
 ; RV64-NEXT:    vsub.vv v8, v8, v9
 ; RV64-NEXT:    lui a0, 209715
 ; RV64-NEXT:    addiw a0, a0, 819
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v9, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
 ; RV64-NEXT:    vand.vx v8, v8, a0
@@ -1120,24 +1087,29 @@ define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vadd.vv v8, v8, v9
 ; RV64-NEXT:    lui a0, 61681
 ; RV64-NEXT:    addiw a0, a0, -241
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v8, v8, a0
 ; RV64-NEXT:    lui a0, 4112
 ; RV64-NEXT:    addiw a0, a0, 257
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
-  %head = insertelement <4 x i1> poison, i1 true, i32 0
-  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
-  %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
-  ret <4 x i32> %v
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
 }
 
-declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
+declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
 
-define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i32:
+define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
@@ -1148,32 +1120,48 @@ define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 16, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsrl.vx v10, v8, a1, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
+; RV32-NEXT:    li a0, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v8i32:
+; RV64-LABEL: vp_ctlz_v4i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
@@ -1184,14 +1172,21 @@ define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    vsrl.vi v10, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsrl.vx v10, v8, a0, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
 ; RV64-NEXT:    lui a0, 349525
 ; RV64-NEXT:    addiw a0, a0, 1365
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
 ; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    lui a0, 209715
 ; RV64-NEXT:    addiw a0, a0, 819
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
 ; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
@@ -1200,20 +1195,25 @@ define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    lui a0, 61681
 ; RV64-NEXT:    addiw a0, a0, -241
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    lui a0, 4112
 ; RV64-NEXT:    addiw a0, a0, 257
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
-  %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
-  ret <8 x i32> %v
+  %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
 }
 
-define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i32_unmasked:
+define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v10
 ; RV32-NEXT:    vsrl.vi v10, v8, 2
@@ -1224,32 +1224,48 @@ define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v10
 ; RV32-NEXT:    vsrl.vi v10, v8, 16
 ; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsrl.vx v10, v8, a1
+; RV32-NEXT:    vor.vv v8, v8, v10
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v12, v8, v10
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vadd.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v10
+; RV32-NEXT:    li a0, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a0
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v8i32_unmasked:
+; RV64-LABEL: vp_ctlz_v4i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
 ; RV64-NEXT:    vsrl.vi v10, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    vsrl.vi v10, v8, 2
@@ -1260,14 +1276,21 @@ define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    vsrl.vi v10, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsrl.vx v10, v8, a0
+; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    vnot.v v8, v8
 ; RV64-NEXT:    vsrl.vi v10, v8, 1
 ; RV64-NEXT:    lui a0, 349525
 ; RV64-NEXT:    addiw a0, a0, 1365
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v10, v10, a0
 ; RV64-NEXT:    vsub.vv v8, v8, v10
 ; RV64-NEXT:    lui a0, 209715
 ; RV64-NEXT:    addiw a0, a0, 819
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v10, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
 ; RV64-NEXT:    vand.vx v8, v8, a0
@@ -1276,24 +1299,29 @@ define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vadd.vv v8, v8, v10
 ; RV64-NEXT:    lui a0, 61681
 ; RV64-NEXT:    addiw a0, a0, -241
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v8, v8, a0
 ; RV64-NEXT:    lui a0, 4112
 ; RV64-NEXT:    addiw a0, a0, 257
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
-  %head = insertelement <8 x i1> poison, i1 true, i32 0
-  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
-  %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
-  ret <8 x i32> %v
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
 }
 
-declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
+declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
 
-define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i32:
+define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 2, v0.t
@@ -1304,32 +1332,48 @@ define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 16, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsrl.vx v12, v8, a1, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
+; RV32-NEXT:    li a0, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v16i32:
+; RV64-LABEL: vp_ctlz_v8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    vsrl.vi v12, v8, 2, v0.t
@@ -1340,14 +1384,21 @@ define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    vsrl.vi v12, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsrl.vx v12, v8, a0, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
 ; RV64-NEXT:    lui a0, 349525
 ; RV64-NEXT:    addiw a0, a0, 1365
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
 ; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    lui a0, 209715
 ; RV64-NEXT:    addiw a0, a0, 819
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
 ; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
@@ -1356,20 +1407,25 @@ define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl
 ; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    lui a0, 61681
 ; RV64-NEXT:    addiw a0, a0, -241
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    lui a0, 4112
 ; RV64-NEXT:    addiw a0, a0, 257
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
-  %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
-  ret <16 x i32> %v
+  %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
 }
 
-define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i32_unmasked:
+define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v12
 ; RV32-NEXT:    vsrl.vi v12, v8, 2
@@ -1380,32 +1436,48 @@ define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v12
 ; RV32-NEXT:    vsrl.vi v12, v8, 16
 ; RV32-NEXT:    vor.vv v8, v8, v12
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsrl.vx v12, v8, a1
+; RV32-NEXT:    vor.vv v8, v8, v12
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vand.vv v16, v8, v12
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v12
+; RV32-NEXT:    li a0, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a0
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v16i32_unmasked:
+; RV64-LABEL: vp_ctlz_v8i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
 ; RV64-NEXT:    vsrl.vi v12, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v12
 ; RV64-NEXT:    vsrl.vi v12, v8, 2
@@ -1416,14 +1488,21 @@ define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v12
 ; RV64-NEXT:    vsrl.vi v12, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsrl.vx v12, v8, a0
+; RV64-NEXT:    vor.vv v8, v8, v12
 ; RV64-NEXT:    vnot.v v8, v8
 ; RV64-NEXT:    vsrl.vi v12, v8, 1
 ; RV64-NEXT:    lui a0, 349525
 ; RV64-NEXT:    addiw a0, a0, 1365
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v12, v12, a0
 ; RV64-NEXT:    vsub.vv v8, v8, v12
 ; RV64-NEXT:    lui a0, 209715
 ; RV64-NEXT:    addiw a0, a0, 819
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v12, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
 ; RV64-NEXT:    vand.vx v8, v8, a0
@@ -1432,107 +1511,127 @@ define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vadd.vv v8, v8, v12
 ; RV64-NEXT:    lui a0, 61681
 ; RV64-NEXT:    addiw a0, a0, -241
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vand.vx v8, v8, a0
 ; RV64-NEXT:    lui a0, 4112
 ; RV64-NEXT:    addiw a0, a0, 257
+; RV64-NEXT:    slli a1, a0, 32
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
+; RV64-NEXT:    li a0, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
-  %head = insertelement <16 x i1> poison, i1 true, i32 0
-  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
-  %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
-  ret <16 x i32> %v
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
 }
 
-declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
+declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32)
 
-define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i64:
+define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v15i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsrl.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    lui a1, 349525
 ; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
+; RV32-NEXT:    sw a1, 28(sp)
+; RV32-NEXT:    sw a1, 24(sp)
 ; RV32-NEXT:    lui a1, 209715
 ; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 16(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    lui a1, 4112
 ; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
+; RV32-NEXT:    sw a1, 4(sp)
+; RV32-NEXT:    sw a1, 0(sp)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    mv a1, sp
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v2i64:
+; RV64-LABEL: vp_ctlz_v15i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV64-NEXT:    lui a0, 349525
 ; RV64-NEXT:    addiw a0, a0, 1365
 ; RV64-NEXT:    slli a1, a0, 32
 ; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    lui a0, 209715
 ; RV64-NEXT:    addiw a0, a0, 819
 ; RV64-NEXT:    slli a1, a0, 32
 ; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
 ; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
+; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    lui a0, 61681
 ; RV64-NEXT:    addiw a0, a0, -241
 ; RV64-NEXT:    slli a1, a0, 32
@@ -1546,97 +1645,112 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
-  %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl)
-  ret <2 x i64> %v
+  %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
+  ret <15 x i64> %v
 }
 
-define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i64_unmasked:
+define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v15i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsrl.vx v9, v8, a1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    lui a1, 349525
 ; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsub.vv v8, v8, v9
+; RV32-NEXT:    sw a1, 28(sp)
+; RV32-NEXT:    sw a1, 24(sp)
 ; RV32-NEXT:    lui a1, 209715
 ; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 16(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v9
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    lui a1, 4112
 ; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v9
-; RV32-NEXT:    li a0, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v2i64_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v9, v8, a0
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
+; RV32-NEXT:    sw a1, 4(sp)
+; RV32-NEXT:    sw a1, 0(sp)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 2
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 8
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 16
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsrl.vx v16, v8, a1
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vsub.vv v8, v8, v16
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v24, v8, v16
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vadd.vv v8, v24, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    mv a1, sp
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    li a0, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a0
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vp_ctlz_v15i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 2
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 8
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 16
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a0
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vnot.v v8, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    lui a0, 349525
 ; RV64-NEXT:    addiw a0, a0, 1365
 ; RV64-NEXT:    slli a1, a0, 32
 ; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
+; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vsub.vv v8, v8, v16
 ; RV64-NEXT:    lui a0, 209715
 ; RV64-NEXT:    addiw a0, a0, 819
 ; RV64-NEXT:    slli a1, a0, 32
 ; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0
+; RV64-NEXT:    vand.vx v16, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
 ; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
 ; RV64-NEXT:    lui a0, 61681
 ; RV64-NEXT:    addiw a0, a0, -241
 ; RV64-NEXT:    slli a1, a0, 32
@@ -1650,101 +1764,116 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
-  %head = insertelement <2 x i1> poison, i1 true, i32 0
-  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
-  %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl)
-  ret <2 x i64> %v
+  %head = insertelement <15 x i1> poison, i1 true, i32 0
+  %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
+  %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
+  ret <15 x i64> %v
 }
 
-declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
+declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
 
-define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i64:
+define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v16i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsrl.vx v10, v8, a1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    lui a1, 349525
 ; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
+; RV32-NEXT:    sw a1, 28(sp)
+; RV32-NEXT:    sw a1, 24(sp)
 ; RV32-NEXT:    lui a1, 209715
 ; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 16(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    lui a1, 4112
 ; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
+; RV32-NEXT:    sw a1, 4(sp)
+; RV32-NEXT:    sw a1, 0(sp)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
+; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    mv a1, sp
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v4i64:
+; RV64-LABEL: vp_ctlz_v16i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV64-NEXT:    lui a0, 349525
 ; RV64-NEXT:    addiw a0, a0, 1365
 ; RV64-NEXT:    slli a1, a0, 32
 ; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    lui a0, 209715
 ; RV64-NEXT:    addiw a0, a0, 819
 ; RV64-NEXT:    slli a1, a0, 32
 ; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
 ; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    lui a0, 61681
 ; RV64-NEXT:    addiw a0, a0, -241
 ; RV64-NEXT:    slli a1, a0, 32
@@ -1758,97 +1887,112 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
-  %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl)
-  ret <4 x i64> %v
+  %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
 }
 
-define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i64_unmasked:
+define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v16i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsrl.vx v10, v8, a1
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    lui a1, 349525
 ; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsub.vv v8, v8, v10
+; RV32-NEXT:    sw a1, 28(sp)
+; RV32-NEXT:    sw a1, 24(sp)
 ; RV32-NEXT:    lui a1, 209715
 ; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 16(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    lui a1, 4112
 ; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v10
+; RV32-NEXT:    sw a1, 4(sp)
+; RV32-NEXT:    sw a1, 0(sp)
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 2
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 8
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 16
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsrl.vx v16, v8, a1
+; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v24, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vsub.vv v8, v8, v16
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v24, v8, v16
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vadd.vv v8, v24, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    mv a1, sp
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
+; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v4i64_unmasked:
+; RV64-LABEL: vp_ctlz_v16i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 2
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 8
+; RV64-NEXT:    vor.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 16
+; RV64-NEXT:    vor.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v10, v8, a0
-; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vsrl.vx v16, v8, a0
+; RV64-NEXT:    vor.vv v8, v8, v16
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    vsrl.vi v16, v8, 1
 ; RV64-NEXT:    lui a0, 349525
 ; RV64-NEXT:    addiw a0, a0, 1365
 ; RV64-NEXT:    slli a1, a0, 32
 ; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
+; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vsub.vv v8, v8, v16
 ; RV64-NEXT:    lui a0, 209715
 ; RV64-NEXT:    addiw a0, a0, 819
 ; RV64-NEXT:    slli a1, a0, 32
 ; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0
+; RV64-NEXT:    vand.vx v16, v8, a0
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
 ; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
 ; RV64-NEXT:    lui a0, 61681
 ; RV64-NEXT:    addiw a0, a0, -241
 ; RV64-NEXT:    slli a1, a0, 32
@@ -1862,248 +2006,54 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
-  %head = insertelement <4 x i1> poison, i1 true, i32 0
-  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
-  %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl)
-  ret <4 x i64> %v
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
 }
 
-declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
+declare <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32)
 
-define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i64:
+define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v32i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsrl.vx v12, v8, a1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV32-NEXT:    addi sp, sp, -48
+; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 48
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v0, 2
 ; RV32-NEXT:    lui a1, 349525
 ; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
+; RV32-NEXT:    sw a1, 44(sp)
+; RV32-NEXT:    sw a1, 40(sp)
 ; RV32-NEXT:    lui a1, 209715
 ; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
+; RV32-NEXT:    sw a1, 36(sp)
+; RV32-NEXT:    sw a1, 32(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    sw a1, 28(sp)
+; RV32-NEXT:    sw a1, 24(sp)
 ; RV32-NEXT:    lui a1, 4112
 ; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
-; RV32-NEXT:    li a0, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v8i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT:    ret
-  %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl)
-  ret <8 x i64> %v
-}
-
-define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i64_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsrl.vx v12, v8, a1
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v12
-; RV32-NEXT:    li a0, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v8i64_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v12, v8, a0
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
-  %head = insertelement <8 x i1> poison, i1 true, i32 0
-  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
-  %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl)
-  ret <8 x i64> %v
-}
-
-declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32)
-
-define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v15i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
 ; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    li a3, 16
 ; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    mv a2, a0
+; RV32-NEXT:    bltu a0, a3, .LBB34_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB34_2:
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
@@ -2118,236 +2068,128 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 40
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 48
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a3, sp, 40
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    vlse64.v v8, (a3), zero
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 48
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 40
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 40
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a3, sp, 32
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vlse64.v v8, (a3), zero
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 48
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 40
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 40
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
+; RV32-NEXT:    addi a3, sp, 24
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
+; RV32-NEXT:    vlse64.v v8, (a3), zero
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 40
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    addi a3, sp, 16
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 3
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT:    li a0, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v15i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT:    ret
-  %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
-  ret <15 x i64> %v
-}
-
-define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v15i64_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsrl.vx v16, v8, a1
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v16
-; RV32-NEXT:    li a0, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v15i64_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
-  %head = insertelement <15 x i1> poison, i1 true, i32 0
-  %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
-  %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
-  ret <15 x i64> %v
-}
-
-declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
-
-define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
+; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a3, a0, -16
+; RV32-NEXT:    sltu a0, a0, a3
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a3
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
+; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
@@ -2356,45 +2198,108 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    li a1, 32
 ; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    addi a0, sp, 48
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    addi a0, sp, 48
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 48
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 48
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT:    li a0, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 48
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v16i64:
+; RV64-LABEL: vp_ctlz_v32i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB34_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB34_2:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
@@ -2405,66 +2310,121 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
+; RV64-NEXT:    lui a2, 349525
+; RV64-NEXT:    addiw a2, a2, 1365
+; RV64-NEXT:    slli a3, a2, 32
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
+; RV64-NEXT:    lui a3, 209715
+; RV64-NEXT:    addiw a3, a3, 819
+; RV64-NEXT:    slli a4, a3, 32
+; RV64-NEXT:    add a3, a3, a4
+; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    lui a4, 61681
+; RV64-NEXT:    addiw a4, a4, -241
+; RV64-NEXT:    slli a5, a4, 32
+; RV64-NEXT:    add a4, a4, a5
+; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
+; RV64-NEXT:    lui a5, 4112
+; RV64-NEXT:    addiw a5, a5, 257
+; RV64-NEXT:    slli a6, a5, 32
+; RV64-NEXT:    add a5, a5, a6
+; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
+; RV64-NEXT:    li a6, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a6, v0.t
+; RV64-NEXT:    addi a7, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a7, a0, -16
+; RV64-NEXT:    sltu a0, a0, a7
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a7
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v8, v16, 2, v0.t
+; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vnot.v v8, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
+; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
+; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a6, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
-  %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
-  ret <16 x i64> %v
+  %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl)
+  ret <32 x i64> %v
 }
 
-define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i64_unmasked:
+define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v32i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
+; RV32-NEXT:    addi sp, sp, -48
+; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
+; RV32-NEXT:    vmv8r.v v24, v16
 ; RV32-NEXT:    lui a1, 349525
 ; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
+; RV32-NEXT:    sw a1, 44(sp)
+; RV32-NEXT:    sw a1, 40(sp)
 ; RV32-NEXT:    lui a1, 209715
 ; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    sw a1, 36(sp)
+; RV32-NEXT:    sw a1, 32(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    sw a1, 28(sp)
+; RV32-NEXT:    sw a1, 24(sp)
 ; RV32-NEXT:    lui a1, 4112
 ; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB35_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB35_2:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vsrl.vi v16, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v16
 ; RV32-NEXT:    vsrl.vi v16, v8, 2
@@ -2475,708 +2435,196 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v16
 ; RV32-NEXT:    vsrl.vi v16, v8, 16
 ; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsrl.vx v16, v8, a1
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    vsrl.vx v16, v8, a2
 ; RV32-NEXT:    vor.vv v8, v8, v16
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
+; RV32-NEXT:    addi a3, sp, 40
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
+; RV32-NEXT:    vlse64.v v0, (a3), zero
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v0, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v16, v16, v0
 ; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    addi a3, sp, 32
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
+; RV32-NEXT:    vlse64.v v0, (a3), zero
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v16, v8, v0
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v16, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    addi a3, sp, 24
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
+; RV32-NEXT:    addi a3, sp, 16
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    addi a3, sp, 48
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vmul.vv v8, v8, v16
-; RV32-NEXT:    li a0, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v16i64_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vsrl.vi v16, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0
-; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    li a0, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
-  %head = insertelement <16 x i1> poison, i1 true, i32 0
-  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
-  %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
-  ret <16 x i64> %v
-}
-
-declare <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32)
-
-define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v32i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 56
-; RV32-NEXT:    mul a1, a1, a2
-; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
-; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 48
-; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT:    vslidedown.vi v24, v0, 2
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    mv a2, a0
-; RV32-NEXT:    bltu a0, a3, .LBB34_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB34_2:
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, sp, 40
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, sp, 32
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
-; RV32-NEXT:    addi a3, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
+; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a1
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 3
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT:    li a2, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a3, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a3
 ; RV32-NEXT:    addi a0, a0, -1
 ; RV32-NEXT:    and a0, a0, a3
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    vsrl.vi v8, v24, 1
+; RV32-NEXT:    vor.vv v8, v24, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 2
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 8
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 16
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vx v24, v8, a2
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 1
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a0, a0, a2
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vand.vv v24, v24, v16
+; RV32-NEXT:    vsub.vv v8, v8, v24
+; RV32-NEXT:    vand.vv v24, v8, v0
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vadd.vv v8, v24, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 24
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16
 ; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 40
-; RV32-NEXT:    mul a0, a0, a1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vx v16, v8, a1
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 56
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add sp, sp, a0
 ; RV32-NEXT:    addi sp, sp, 48
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vp_ctlz_v32i64:
+; RV64-LABEL: vp_ctlz_v32i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    .cfi_def_cfa_offset 16
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
-; RV64-NEXT:    sub sp, sp, a1
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:    vslidedown.vi v24, v0, 2
 ; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB34_2
+; RV64-NEXT:    bltu a0, a2, .LBB35_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a1, 16
-; RV64-NEXT:  .LBB34_2:
+; RV64-NEXT:  .LBB35_2:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v24, v8, 1
+; RV64-NEXT:    vor.vv v8, v8, v24
+; RV64-NEXT:    vsrl.vi v24, v8, 2
+; RV64-NEXT:    vor.vv v8, v8, v24
+; RV64-NEXT:    vsrl.vi v24, v8, 4
+; RV64-NEXT:    vor.vv v8, v8, v24
+; RV64-NEXT:    vsrl.vi v24, v8, 8
+; RV64-NEXT:    vor.vv v8, v8, v24
+; RV64-NEXT:    vsrl.vi v24, v8, 16
+; RV64-NEXT:    vor.vv v8, v8, v24
 ; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vsrl.vx v24, v8, a1
+; RV64-NEXT:    vor.vv v8, v8, v24
+; RV64-NEXT:    vnot.v v8, v8
+; RV64-NEXT:    vsrl.vi v24, v8, 1
 ; RV64-NEXT:    lui a2, 349525
 ; RV64-NEXT:    addiw a2, a2, 1365
 ; RV64-NEXT:    slli a3, a2, 32
 ; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v24, v24, a2
+; RV64-NEXT:    vsub.vv v8, v8, v24
 ; RV64-NEXT:    lui a3, 209715
 ; RV64-NEXT:    addiw a3, a3, 819
 ; RV64-NEXT:    slli a4, a3, 32
 ; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v24, v8, a3
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vadd.vv v8, v24, v8
+; RV64-NEXT:    vsrl.vi v24, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v24
 ; RV64-NEXT:    lui a4, 61681
 ; RV64-NEXT:    addiw a4, a4, -241
 ; RV64-NEXT:    slli a5, a4, 32
 ; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a4
 ; RV64-NEXT:    lui a5, 4112
 ; RV64-NEXT:    addiw a5, a5, 257
 ; RV64-NEXT:    slli a6, a5, 32
 ; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
+; RV64-NEXT:    vmul.vx v8, v8, a5
 ; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6, v0.t
-; RV64-NEXT:    addi a7, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vx v8, v8, a6
 ; RV64-NEXT:    addi a7, a0, -16
 ; RV64-NEXT:    sltu a0, a0, a7
 ; RV64-NEXT:    addi a0, a0, -1
 ; RV64-NEXT:    and a0, a0, a7
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vmv1r.v v0, v24
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v16, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v8, v16, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    vsrl.vx v16, v8, a6, v0.t
-; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    vsrl.vi v24, v16, 1
+; RV64-NEXT:    vor.vv v16, v16, v24
+; RV64-NEXT:    vsrl.vi v24, v16, 2
+; RV64-NEXT:    vor.vv v16, v16, v24
+; RV64-NEXT:    vsrl.vi v24, v16, 4
+; RV64-NEXT:    vor.vv v16, v16, v24
+; RV64-NEXT:    vsrl.vi v24, v16, 8
+; RV64-NEXT:    vor.vv v16, v16, v24
+; RV64-NEXT:    vsrl.vi v24, v16, 16
+; RV64-NEXT:    vor.vv v16, v16, v24
+; RV64-NEXT:    vsrl.vx v24, v16, a1
+; RV64-NEXT:    vor.vv v16, v16, v24
+; RV64-NEXT:    vnot.v v16, v16
+; RV64-NEXT:    vsrl.vi v24, v16, 1
+; RV64-NEXT:    vand.vx v24, v24, a2
+; RV64-NEXT:    vsub.vv v16, v16, v24
+; RV64-NEXT:    vand.vx v24, v16, a3
+; RV64-NEXT:    vsrl.vi v16, v16, 2
+; RV64-NEXT:    vand.vx v16, v16, a3
+; RV64-NEXT:    vadd.vv v16, v24, v16
+; RV64-NEXT:    vsrl.vi v24, v16, 4
+; RV64-NEXT:    vadd.vv v16, v16, v24
+; RV64-NEXT:    vand.vx v16, v16, a4
+; RV64-NEXT:    vmul.vx v16, v16, a5
+; RV64-NEXT:    vsrl.vx v16, v16, a6
 ; RV64-NEXT:    ret
-  %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl)
-  ret <32 x i64> %v
-}
-
-define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v32i64_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
-; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
-; RV32-NEXT:    vmv8r.v v24, v16
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    li a2, 16
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    bltu a0, a2, .LBB35_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    li a1, 16
-; RV32-NEXT:  .LBB35_2:
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    li a2, 32
-; RV32-NEXT:    vsrl.vx v16, v8, a2
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a3, sp, 40
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v0, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 32
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a3), zero
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    addi a3, sp, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v16
-; RV32-NEXT:    li a1, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a1
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 3
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, a0, -16
-; RV32-NEXT:    sltu a0, a0, a3
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    and a0, a0, a3
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v8, v24, 1
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vx v24, v8, a2
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a2, 24
-; RV32-NEXT:    mul a0, a0, a2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v24, v24, v16
-; RV32-NEXT:    vsub.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v24, v8, v0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v24
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vx v16, v8, a1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_v32i64_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a2, 16
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB35_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    li a1, 16
-; RV64-NEXT:  .LBB35_2:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vsrl.vi v24, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vsrl.vi v24, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vsrl.vi v24, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vsrl.vi v24, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsrl.vx v24, v8, a1
-; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    lui a2, 349525
-; RV64-NEXT:    addiw a2, a2, 1365
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vsub.vv v8, v8, v24
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v24, v8, a3
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vadd.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v24
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6
-; RV64-NEXT:    addi a7, a0, -16
-; RV64-NEXT:    sltu a0, a0, a7
-; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a7
-; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 2
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 8
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 16
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vx v24, v16, a1
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vnot.v v16, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vsub.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v16, a3
-; RV64-NEXT:    vsrl.vi v16, v16, 2
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vadd.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v16, v16, a4
-; RV64-NEXT:    vmul.vx v16, v16, a5
-; RV64-NEXT:    vsrl.vx v16, v16, a6
-; RV64-NEXT:    ret
-  %head = insertelement <32 x i1> poison, i1 true, i32 0
-  %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+  %head = insertelement <32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
   %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl)
   ret <32 x i64> %v
 }
@@ -3414,141 +2862,75 @@ define <16 x i8> @vp_ctlz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext %
 }
 
 define <2 x i16> @vp_ctlz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl)
   ret <2 x i16> %v
 }
 
 define <2 x i16> @vp_ctlz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v2i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v2i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl)
@@ -3556,141 +2938,75 @@ define <2 x i16> @vp_ctlz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %
 }
 
 define <4 x i16> @vp_ctlz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
 }
 
 define <4 x i16> @vp_ctlz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v4i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v4i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl)
@@ -3698,141 +3014,75 @@ define <4 x i16> @vp_ctlz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %
 }
 
 define <8 x i16> @vp_ctlz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl)
   ret <8 x i16> %v
 }
 
 define <8 x i16> @vp_ctlz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v8i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v8i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl)
@@ -3840,141 +3090,75 @@ define <8 x i16> @vp_ctlz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %
 }
 
 define <16 x i16> @vp_ctlz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl)
   ret <16 x i16> %v
 }
 
 define <16 x i16> @vp_ctlz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v16i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v16i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl)
@@ -3982,153 +3166,81 @@ define <16 x i16> @vp_ctlz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroex
 }
 
 define <2 x i32> @vp_ctlz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
   ret <2 x i32> %v
 }
 
 define <2 x i32> @vp_ctlz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 16
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
@@ -4136,153 +3248,81 @@ define <2 x i32> @vp_ctlz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %
 }
 
 define <4 x i32> @vp_ctlz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl)
   ret <4 x i32> %v
-}
-
-define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v4i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v4i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+}
+
+define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_zero_undef_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 16
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl)
@@ -4290,153 +3330,81 @@ define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %
 }
 
 define <8 x i32> @vp_ctlz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl)
   ret <8 x i32> %v
 }
 
 define <8 x i32> @vp_ctlz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v8i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v8i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 16
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl)
@@ -4444,153 +3412,81 @@ define <8 x i32> @vp_ctlz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %
 }
 
 define <16 x i32> @vp_ctlz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 16, v0.t
-; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 16, v0.t
-; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 16, v0.t
+; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl)
   ret <16 x i32> %v
 }
 
 define <16 x i32> @vp_ctlz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v16i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v16i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 2
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 16
-; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 16
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 9ad8beac56c4b75..9c30d3ac71679ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -123,11 +123,11 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 5
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 3
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -135,7 +135,7 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 1
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    li a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
@@ -143,75 +143,40 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vse16.v v8, (a0)
 ; LMULMAX2-RV64I-NEXT:    ret
 ;
-; LMULMAX1-RV32-LABEL: ctlz_v8i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a1, 5
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 3
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 1
-; LMULMAX1-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    li a1, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctlz_v8i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a1, 5
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 3
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 1
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    li a1, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX1-LABEL: ctlz_v8i16:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT:    vle16.v v8, (a0)
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT:    vor.vv v8, v8, v9
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 2
+; LMULMAX1-NEXT:    vor.vv v8, v8, v9
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT:    vor.vv v8, v8, v9
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 8
+; LMULMAX1-NEXT:    vor.vv v8, v8, v9
+; LMULMAX1-NEXT:    vnot.v v8, v8
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT:    lui a1, 5
+; LMULMAX1-NEXT:    addi a1, a1, 1365
+; LMULMAX1-NEXT:    vand.vx v9, v9, a1
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v9
+; LMULMAX1-NEXT:    lui a1, 3
+; LMULMAX1-NEXT:    addi a1, a1, 819
+; LMULMAX1-NEXT:    vand.vx v9, v8, a1
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a1
+; LMULMAX1-NEXT:    vadd.vv v8, v9, v8
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v9
+; LMULMAX1-NEXT:    lui a1, 1
+; LMULMAX1-NEXT:    addi a1, a1, -241
+; LMULMAX1-NEXT:    vand.vx v8, v8, a1
+; LMULMAX1-NEXT:    li a1, 257
+; LMULMAX1-NEXT:    vmul.vx v8, v8, a1
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT:    vse16.v v8, (a0)
+; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX2-RV32F-LABEL: ctlz_v8i16:
 ; LMULMAX2-RV32F:       # %bb.0:
@@ -349,11 +314,11 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -361,10 +326,10 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
@@ -769,193 +734,99 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind {
 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
 
 define void @ctlz_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32-LABEL: ctlz_v16i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 5
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 3
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 1
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    li a1, 257
-; LMULMAX2-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: ctlz_v16i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 5
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 3
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 1
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    li a1, 257
-; LMULMAX2-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctlz_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a2, 5
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a3, 3
-; LMULMAX1-RV32-NEXT:    addi a3, a3, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a4, 1
-; LMULMAX1-RV32-NEXT:    addi a4, a4, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT:    li a5, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vnot.v v9, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctlz_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a2, 5
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a3, 3
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a4, 1
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    li a5, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vnot.v v9, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX2-LABEL: ctlz_v16i16:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT:    vle16.v v8, (a0)
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 2
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 8
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vnot.v v8, v8
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    lui a1, 5
+; LMULMAX2-NEXT:    addi a1, a1, 1365
+; LMULMAX2-NEXT:    vand.vx v10, v10, a1
+; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 3
+; LMULMAX2-NEXT:    addi a1, a1, 819
+; LMULMAX2-NEXT:    vand.vx v10, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 1
+; LMULMAX2-NEXT:    addi a1, a1, -241
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    li a1, 257
+; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX2-NEXT:    vse16.v v8, (a0)
+; LMULMAX2-NEXT:    ret
+;
+; LMULMAX1-LABEL: ctlz_v16i16:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    vle16.v v8, (a1)
+; LMULMAX1-NEXT:    vle16.v v9, (a0)
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 2
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 8
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vnot.v v8, v8
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    lui a2, 5
+; LMULMAX1-NEXT:    addi a2, a2, 1365
+; LMULMAX1-NEXT:    vand.vx v10, v10, a2
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a3, 3
+; LMULMAX1-NEXT:    addi a3, a3, 819
+; LMULMAX1-NEXT:    vand.vx v10, v8, a3
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a3
+; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a4, 1
+; LMULMAX1-NEXT:    addi a4, a4, -241
+; LMULMAX1-NEXT:    vand.vx v8, v8, a4
+; LMULMAX1-NEXT:    li a5, 257
+; LMULMAX1-NEXT:    vmul.vx v8, v8, a5
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 2
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 8
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vnot.v v9, v9
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vand.vx v10, v10, a2
+; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v10, v9, a3
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT:    vand.vx v9, v9, a3
+; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v9, v9, a4
+; LMULMAX1-NEXT:    vmul.vx v9, v9, a5
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 8
+; LMULMAX1-NEXT:    vse16.v v9, (a0)
+; LMULMAX1-NEXT:    vse16.v v8, (a1)
+; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX8-LABEL: ctlz_v16i16:
 ; LMULMAX8:       # %bb.0:
@@ -1041,11 +912,11 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
 ; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -1053,10 +924,10 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
@@ -1461,11 +1332,11 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 5
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 3
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -1473,7 +1344,7 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 1
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    li a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
@@ -1481,75 +1352,40 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vse16.v v8, (a0)
 ; LMULMAX2-RV64I-NEXT:    ret
 ;
-; LMULMAX1-RV32-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a1, 5
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 3
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 1
-; LMULMAX1-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    li a1, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a1, 5
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 3
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 1
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    li a1, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX1-LABEL: ctlz_zero_undef_v8i16:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT:    vle16.v v8, (a0)
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT:    vor.vv v8, v8, v9
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 2
+; LMULMAX1-NEXT:    vor.vv v8, v8, v9
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT:    vor.vv v8, v8, v9
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 8
+; LMULMAX1-NEXT:    vor.vv v8, v8, v9
+; LMULMAX1-NEXT:    vnot.v v8, v8
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT:    lui a1, 5
+; LMULMAX1-NEXT:    addi a1, a1, 1365
+; LMULMAX1-NEXT:    vand.vx v9, v9, a1
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v9
+; LMULMAX1-NEXT:    lui a1, 3
+; LMULMAX1-NEXT:    addi a1, a1, 819
+; LMULMAX1-NEXT:    vand.vx v9, v8, a1
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a1
+; LMULMAX1-NEXT:    vadd.vv v8, v9, v8
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v9
+; LMULMAX1-NEXT:    lui a1, 1
+; LMULMAX1-NEXT:    addi a1, a1, -241
+; LMULMAX1-NEXT:    vand.vx v8, v8, a1
+; LMULMAX1-NEXT:    li a1, 257
+; LMULMAX1-NEXT:    vmul.vx v8, v8, a1
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT:    vse16.v v8, (a0)
+; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v8i16:
 ; LMULMAX2-RV32F:       # %bb.0:
@@ -1676,11 +1512,11 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -1688,10 +1524,10 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
@@ -2069,193 +1905,99 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
 }
 
 define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 5
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 3
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 1
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    li a1, 257
-; LMULMAX2-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 5
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 3
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 1
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    li a1, 257
-; LMULMAX2-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a2, 5
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a3, 3
-; LMULMAX1-RV32-NEXT:    addi a3, a3, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a4, 1
-; LMULMAX1-RV32-NEXT:    addi a4, a4, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT:    li a5, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vnot.v v9, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a2, 5
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a3, 3
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a4, 1
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    li a5, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vnot.v v9, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX2-LABEL: ctlz_zero_undef_v16i16:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT:    vle16.v v8, (a0)
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 2
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 8
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vnot.v v8, v8
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    lui a1, 5
+; LMULMAX2-NEXT:    addi a1, a1, 1365
+; LMULMAX2-NEXT:    vand.vx v10, v10, a1
+; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 3
+; LMULMAX2-NEXT:    addi a1, a1, 819
+; LMULMAX2-NEXT:    vand.vx v10, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 1
+; LMULMAX2-NEXT:    addi a1, a1, -241
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    li a1, 257
+; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX2-NEXT:    vse16.v v8, (a0)
+; LMULMAX2-NEXT:    ret
+;
+; LMULMAX1-LABEL: ctlz_zero_undef_v16i16:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    vle16.v v8, (a1)
+; LMULMAX1-NEXT:    vle16.v v9, (a0)
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 2
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 8
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vnot.v v8, v8
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    lui a2, 5
+; LMULMAX1-NEXT:    addi a2, a2, 1365
+; LMULMAX1-NEXT:    vand.vx v10, v10, a2
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a3, 3
+; LMULMAX1-NEXT:    addi a3, a3, 819
+; LMULMAX1-NEXT:    vand.vx v10, v8, a3
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a3
+; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a4, 1
+; LMULMAX1-NEXT:    addi a4, a4, -241
+; LMULMAX1-NEXT:    vand.vx v8, v8, a4
+; LMULMAX1-NEXT:    li a5, 257
+; LMULMAX1-NEXT:    vmul.vx v8, v8, a5
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 2
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 8
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vnot.v v9, v9
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vand.vx v10, v10, a2
+; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v10, v9, a3
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT:    vand.vx v9, v9, a3
+; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v9, v9, a4
+; LMULMAX1-NEXT:    vmul.vx v9, v9, a5
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 8
+; LMULMAX1-NEXT:    vse16.v v9, (a0)
+; LMULMAX1-NEXT:    vse16.v v8, (a1)
+; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX8-LABEL: ctlz_zero_undef_v16i16:
 ; LMULMAX8:       # %bb.0:
@@ -2338,11 +2080,11 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
 ; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -2350,10 +2092,10 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
@@ -2627,3 +2369,8 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
   store <4 x i64> %c, ptr %x
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LMULMAX1-RV32: {{.*}}
+; LMULMAX1-RV64: {{.*}}
+; LMULMAX2-RV32: {{.*}}
+; LMULMAX2-RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
index 55485beff8eb1d5..08f7e2058ad29ea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
@@ -191,105 +191,57 @@ define <16 x i8> @vp_ctpop_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
 declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
 
 define <2 x i16> @vp_ctpop_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
   ret <2 x i16> %v
 }
 
 define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v2i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v2i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
@@ -299,105 +251,57 @@ define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
 declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
 
 define <4 x i16> @vp_ctpop_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
 }
 
 define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v4i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v4i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
@@ -407,105 +311,57 @@ define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
 declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
 
 define <8 x i16> @vp_ctpop_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
   ret <8 x i16> %v
 }
 
 define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v8i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v8i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
@@ -515,105 +371,57 @@ define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
 declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
 
 define <16 x i16> @vp_ctpop_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
   ret <16 x i16> %v
 }
 
 define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v16i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v16i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
@@ -623,109 +431,59 @@ define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
 declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
 
 define <2 x i32> @vp_ctpop_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
   ret <2 x i32> %v
 }
 
 define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
@@ -735,109 +493,59 @@ define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
 declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
 
 define <4 x i32> @vp_ctpop_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
   ret <4 x i32> %v
 }
 
 define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v4i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v4i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
@@ -847,109 +555,59 @@ define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
 declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
 
 define <8 x i32> @vp_ctpop_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
   ret <8 x i32> %v
 }
 
 define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v8i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v8i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
@@ -959,109 +617,59 @@ define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
 declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
 
 define <16 x i32> @vp_ctpop_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
   ret <16 x i32> %v
 }
 
 define <16 x i32> @vp_ctpop_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v16i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_ctpop_v16i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_ctpop_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index c7b6db226ee5f6e..0b2a44fa14526be 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -42,109 +42,31 @@ define void @ctpop_v16i8(ptr %x, ptr %y) {
 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
 
 define void @ctpop_v8i16(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: ctpop_v8i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 5
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    lui a1, 3
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    lui a1, 1
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    li a1, 257
-; LMULMAX2-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: ctpop_v8i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 5
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    lui a1, 3
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    lui a1, 1
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    li a1, 257
-; LMULMAX2-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v8i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a1, 5
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 3
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 1
-; LMULMAX1-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    li a1, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctpop_v8i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a1, 5
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 3
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 1
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    li a1, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a1, 5
+; CHECK-NEXT:    addi a1, a1, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a1, 3
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    vand.vx v9, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a1, 1
+; CHECK-NEXT:    addi a1, a1, -241
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    li a1, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v8i16:
 ; ZVBB:       # %bb.0:
@@ -162,113 +84,32 @@ define void @ctpop_v8i16(ptr %x, ptr %y) {
 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
 
 define void @ctpop_v4i32(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: ctpop_v4i32:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 349525
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    lui a1, 209715
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    lui a1, 61681
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    lui a1, 4112
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: ctpop_v4i32:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 349525
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    lui a1, 209715
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    lui a1, 61681
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    lui a1, 4112
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v4i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a1, 349525
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 209715
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 61681
-; LMULMAX1-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    lui a1, 4112
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctpop_v4i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a1, 349525
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 209715
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 61681
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    lui a1, 4112
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a1, 349525
+; CHECK-NEXT:    addi a1, a1, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a1, 209715
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    vand.vx v9, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a1, 61681
+; CHECK-NEXT:    addi a1, a1, -241
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    lui a1, 4112
+; CHECK-NEXT:    addi a1, a1, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v4i32:
 ; ZVBB:       # %bb.0:
@@ -523,139 +364,72 @@ define void @ctpop_v32i8(ptr %x, ptr %y) {
 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
 
 define void @ctpop_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: ctpop_v16i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 5
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 3
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 1
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    li a1, 257
-; LMULMAX2-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: ctpop_v16i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 5
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 3
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 1
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    li a1, 257
-; LMULMAX2-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a2, 5
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a3, 3
-; LMULMAX1-RV32-NEXT:    addi a3, a3, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a4, 1
-; LMULMAX1-RV32-NEXT:    addi a4, a4, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT:    li a5, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
+; LMULMAX2-LABEL: ctpop_v16i16:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT:    vle16.v v8, (a0)
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    lui a1, 5
+; LMULMAX2-NEXT:    addi a1, a1, 1365
+; LMULMAX2-NEXT:    vand.vx v10, v10, a1
+; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 3
+; LMULMAX2-NEXT:    addi a1, a1, 819
+; LMULMAX2-NEXT:    vand.vx v10, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 1
+; LMULMAX2-NEXT:    addi a1, a1, -241
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    li a1, 257
+; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX2-NEXT:    vse16.v v8, (a0)
+; LMULMAX2-NEXT:    ret
 ;
-; LMULMAX1-RV64-LABEL: ctpop_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a2, 5
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a3, 3
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a4, 1
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    li a5, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX1-LABEL: ctpop_v16i16:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    vle16.v v8, (a1)
+; LMULMAX1-NEXT:    vle16.v v9, (a0)
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    lui a2, 5
+; LMULMAX1-NEXT:    addi a2, a2, 1365
+; LMULMAX1-NEXT:    vand.vx v10, v10, a2
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a3, 3
+; LMULMAX1-NEXT:    addi a3, a3, 819
+; LMULMAX1-NEXT:    vand.vx v10, v8, a3
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a3
+; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a4, 1
+; LMULMAX1-NEXT:    addi a4, a4, -241
+; LMULMAX1-NEXT:    vand.vx v8, v8, a4
+; LMULMAX1-NEXT:    li a5, 257
+; LMULMAX1-NEXT:    vmul.vx v8, v8, a5
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vand.vx v10, v10, a2
+; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v10, v9, a3
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT:    vand.vx v9, v9, a3
+; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v9, v9, a4
+; LMULMAX1-NEXT:    vmul.vx v9, v9, a5
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 8
+; LMULMAX1-NEXT:    vse16.v v9, (a0)
+; LMULMAX1-NEXT:    vse16.v v8, (a1)
+; LMULMAX1-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v16i16:
 ; ZVBB:       # %bb.0:
@@ -673,143 +447,74 @@ define void @ctpop_v16i16(ptr %x, ptr %y) {
 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
 
 define void @ctpop_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: ctpop_v8i32:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 349525
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 209715
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 61681
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    lui a1, 4112
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: ctpop_v8i32:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 349525
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 209715
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 61681
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    lui a1, 4112
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a2, 349525
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a3, 209715
-; LMULMAX1-RV32-NEXT:    addi a3, a3, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a4, 61681
-; LMULMAX1-RV32-NEXT:    addi a4, a4, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT:    lui a5, 4112
-; LMULMAX1-RV32-NEXT:    addi a5, a5, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 24
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
+; LMULMAX2-LABEL: ctpop_v8i32:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; LMULMAX2-NEXT:    vle32.v v8, (a0)
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    lui a1, 349525
+; LMULMAX2-NEXT:    addi a1, a1, 1365
+; LMULMAX2-NEXT:    vand.vx v10, v10, a1
+; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 209715
+; LMULMAX2-NEXT:    addi a1, a1, 819
+; LMULMAX2-NEXT:    vand.vx v10, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 61681
+; LMULMAX2-NEXT:    addi a1, a1, -241
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    lui a1, 4112
+; LMULMAX2-NEXT:    addi a1, a1, 257
+; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 24
+; LMULMAX2-NEXT:    vse32.v v8, (a0)
+; LMULMAX2-NEXT:    ret
 ;
-; LMULMAX1-RV64-LABEL: ctpop_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a2, 349525
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a3, 209715
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a4, 61681
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    lui a5, 4112
-; LMULMAX1-RV64-NEXT:    addiw a5, a5, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 24
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX1-LABEL: ctpop_v8i32:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    vle32.v v8, (a1)
+; LMULMAX1-NEXT:    vle32.v v9, (a0)
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    lui a2, 349525
+; LMULMAX1-NEXT:    addi a2, a2, 1365
+; LMULMAX1-NEXT:    vand.vx v10, v10, a2
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a3, 209715
+; LMULMAX1-NEXT:    addi a3, a3, 819
+; LMULMAX1-NEXT:    vand.vx v10, v8, a3
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a3
+; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a4, 61681
+; LMULMAX1-NEXT:    addi a4, a4, -241
+; LMULMAX1-NEXT:    vand.vx v8, v8, a4
+; LMULMAX1-NEXT:    lui a5, 4112
+; LMULMAX1-NEXT:    addi a5, a5, 257
+; LMULMAX1-NEXT:    vmul.vx v8, v8, a5
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 24
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vand.vx v10, v10, a2
+; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v10, v9, a3
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT:    vand.vx v9, v9, a3
+; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v9, v9, a4
+; LMULMAX1-NEXT:    vmul.vx v9, v9, a5
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 24
+; LMULMAX1-NEXT:    vse32.v v9, (a0)
+; LMULMAX1-NEXT:    vse32.v v8, (a1)
+; LMULMAX1-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v8i32:
 ; ZVBB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
index 28df7f083c4a03e..345e4180bba31a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
@@ -223,121 +223,65 @@ define <16 x i8> @vp_cttz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
 declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
 
 define <2 x i16> @vp_cttz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl)
   ret <2 x i16> %v
 }
 
 define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v2i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v2i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl)
@@ -347,121 +291,65 @@ define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
 declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
 
 define <4 x i16> @vp_cttz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
 }
 
 define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v4i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v4i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl)
@@ -471,121 +359,65 @@ define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
 declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
 
 define <8 x i16> @vp_cttz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl)
   ret <8 x i16> %v
 }
 
 define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v8i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v8i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl)
@@ -595,121 +427,65 @@ define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
 declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
 
 define <16 x i16> @vp_cttz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl)
   ret <16 x i16> %v
 }
 
 define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v16i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v16i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl)
@@ -719,125 +495,67 @@ define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
 declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
 
 define <2 x i32> @vp_cttz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl)
   ret <2 x i32> %v
 }
 
 define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl)
@@ -847,125 +565,67 @@ define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
 declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
 
 define <4 x i32> @vp_cttz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
-  %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
+; CHECK-LABEL: vp_cttz_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
   ret <4 x i32> %v
 }
 
 define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v4i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v4i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
@@ -975,125 +635,67 @@ define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
 declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
 
 define <8 x i32> @vp_cttz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
   ret <8 x i32> %v
 }
 
 define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v8i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v8i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
@@ -1103,125 +705,67 @@ define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
 declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
 
 define <16 x i32> @vp_cttz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsub.vx v12, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
   ret <16 x i32> %v
 }
 
 define <16 x i32> @vp_cttz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v16i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_v16i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsub.vx v12, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
@@ -2894,121 +2438,65 @@ define <16 x i8> @vp_cttz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext %
 }
 
 define <2 x i16> @vp_cttz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl)
   ret <2 x i16> %v
 }
 
 define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v2i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v2i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <2 x i1> poison, i1 true, i32 0
   %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
   %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl)
@@ -3016,121 +2504,65 @@ define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %
 }
 
 define <4 x i16> @vp_cttz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl)
   ret <4 x i16> %v
 }
 
 define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v4i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v4i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl)
@@ -3138,121 +2570,65 @@ define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %
 }
 
 define <8 x i16> @vp_cttz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl)
   ret <8 x i16> %v
 }
 
 define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v8i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v8i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl)
@@ -3260,121 +2636,65 @@ define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %
 }
 
 define <16 x i16> @vp_cttz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl)
   ret <16 x i16> %v
 }
 
 define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v16i16_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 5
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 3
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 1
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    li a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v16i16_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 5
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 1
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    li a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 8
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 3
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 1
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl)
@@ -3382,251 +2702,135 @@ define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroex
 }
 
 define <2 x i32> @vp_cttz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
   ret <2 x i32> %v
 }
 
 define <2 x i32> @vp_cttz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v2i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v2i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
-  %head = insertelement <2 x i1> poison, i1 true, i32 0
-  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
-  %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
-  ret <2 x i32> %v
-}
-
-define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl)
   ret <4 x i32> %v
 }
 
 define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v4i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v9, v9, a0
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v4i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <4 x i1> poison, i1 true, i32 0
   %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
   %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl)
@@ -3634,125 +2838,67 @@ define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %
 }
 
 define <8 x i32> @vp_cttz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl)
   ret <8 x i32> %v
 }
 
 define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v8i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v8i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsub.vx v10, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i32 0
   %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
   %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl)
@@ -3760,125 +2906,67 @@ define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %
 }
 
 define <16 x i32> @vp_cttz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsub.vx v12, v8, a1, v0.t
+; CHECK-NEXT:    vnot.v v8, v8, v0.t
+; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT:    ret
   %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl)
   ret <16 x i32> %v
 }
 
 define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v16i32_unmasked:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vand.vx v12, v12, a0
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vand.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 61681
-; RV32-NEXT:    addi a0, a0, -241
-; RV32-NEXT:    vand.vx v8, v8, a0
-; RV32-NEXT:    lui a0, 4112
-; RV32-NEXT:    addi a0, a0, 257
-; RV32-NEXT:    vmul.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 24
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v16i32_unmasked:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    vand.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    vmul.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 24
-; RV64-NEXT:    ret
+; CHECK-LABEL: vp_cttz_zero_undef_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsub.vx v12, v8, a1
+; CHECK-NEXT:    vnot.v v8, v8
+; CHECK-NEXT:    vand.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vi v12, v8, 1
+; CHECK-NEXT:    lui a0, 349525
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vand.vx v12, v12, a0
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 209715
+; CHECK-NEXT:    addi a0, a0, 819
+; CHECK-NEXT:    vand.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    vadd.vv v8, v12, v8
+; CHECK-NEXT:    vsrl.vi v12, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    lui a0, 61681
+; CHECK-NEXT:    addi a0, a0, -241
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 4112
+; CHECK-NEXT:    addi a0, a0, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
   %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 83774704f9ca911..1b422730f2ac7b1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -113,11 +113,11 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 5
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 3
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -125,7 +125,7 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 1
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    li a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
@@ -133,65 +133,35 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vse16.v v8, (a0)
 ; LMULMAX2-RV64I-NEXT:    ret
 ;
-; LMULMAX1-RV32-LABEL: cttz_v8i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    li a1, 1
-; LMULMAX1-RV32-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a1, 5
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 3
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 1
-; LMULMAX1-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    li a1, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: cttz_v8i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    li a1, 1
-; LMULMAX1-RV64-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a1, 5
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 3
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 1
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    li a1, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX1-LABEL: cttz_v8i16:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT:    vle16.v v8, (a0)
+; LMULMAX1-NEXT:    li a1, 1
+; LMULMAX1-NEXT:    vsub.vx v9, v8, a1
+; LMULMAX1-NEXT:    vnot.v v8, v8
+; LMULMAX1-NEXT:    vand.vv v8, v8, v9
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT:    lui a1, 5
+; LMULMAX1-NEXT:    addi a1, a1, 1365
+; LMULMAX1-NEXT:    vand.vx v9, v9, a1
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v9
+; LMULMAX1-NEXT:    lui a1, 3
+; LMULMAX1-NEXT:    addi a1, a1, 819
+; LMULMAX1-NEXT:    vand.vx v9, v8, a1
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a1
+; LMULMAX1-NEXT:    vadd.vv v8, v9, v8
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v9
+; LMULMAX1-NEXT:    lui a1, 1
+; LMULMAX1-NEXT:    addi a1, a1, -241
+; LMULMAX1-NEXT:    vand.vx v8, v8, a1
+; LMULMAX1-NEXT:    li a1, 257
+; LMULMAX1-NEXT:    vmul.vx v8, v8, a1
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT:    vse16.v v8, (a0)
+; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX2-RV32F-LABEL: cttz_v8i16:
 ; LMULMAX2-RV32F:       # %bb.0:
@@ -330,11 +300,11 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -342,10 +312,10 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
@@ -752,161 +722,83 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
 
 define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32-LABEL: cttz_v16i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    li a1, 1
-; LMULMAX2-RV32-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 5
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 3
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 1
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    li a1, 257
-; LMULMAX2-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: cttz_v16i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    li a1, 1
-; LMULMAX2-RV64-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 5
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 3
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 1
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    li a1, 257
-; LMULMAX2-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: cttz_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    li a2, 1
-; LMULMAX1-RV32-NEXT:    vsub.vx v10, v8, a2
-; LMULMAX1-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a3, 5
-; LMULMAX1-RV32-NEXT:    addi a3, a3, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a4, 3
-; LMULMAX1-RV32-NEXT:    addi a4, a4, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v8, a4
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a5, 1
-; LMULMAX1-RV32-NEXT:    addi a5, a5, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT:    li a6, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a6
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vsub.vx v10, v9, a2
-; LMULMAX1-RV32-NEXT:    vnot.v v9, v9
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v9, a4
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT:    vmul.vx v9, v9, a6
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: cttz_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    li a2, 1
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v8, a2
-; LMULMAX1-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a3, 5
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a4, 3
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v8, a4
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a5, 1
-; LMULMAX1-RV64-NEXT:    addiw a5, a5, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    li a6, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a6
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v9, a2
-; LMULMAX1-RV64-NEXT:    vnot.v v9, v9
-; LMULMAX1-RV64-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v9, a4
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a6
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX2-LABEL: cttz_v16i16:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT:    vle16.v v8, (a0)
+; LMULMAX2-NEXT:    li a1, 1
+; LMULMAX2-NEXT:    vsub.vx v10, v8, a1
+; LMULMAX2-NEXT:    vnot.v v8, v8
+; LMULMAX2-NEXT:    vand.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    lui a1, 5
+; LMULMAX2-NEXT:    addi a1, a1, 1365
+; LMULMAX2-NEXT:    vand.vx v10, v10, a1
+; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 3
+; LMULMAX2-NEXT:    addi a1, a1, 819
+; LMULMAX2-NEXT:    vand.vx v10, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 1
+; LMULMAX2-NEXT:    addi a1, a1, -241
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    li a1, 257
+; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX2-NEXT:    vse16.v v8, (a0)
+; LMULMAX2-NEXT:    ret
+;
+; LMULMAX1-LABEL: cttz_v16i16:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    vle16.v v8, (a1)
+; LMULMAX1-NEXT:    vle16.v v9, (a0)
+; LMULMAX1-NEXT:    li a2, 1
+; LMULMAX1-NEXT:    vsub.vx v10, v8, a2
+; LMULMAX1-NEXT:    vnot.v v8, v8
+; LMULMAX1-NEXT:    vand.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    lui a3, 5
+; LMULMAX1-NEXT:    addi a3, a3, 1365
+; LMULMAX1-NEXT:    vand.vx v10, v10, a3
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a4, 3
+; LMULMAX1-NEXT:    addi a4, a4, 819
+; LMULMAX1-NEXT:    vand.vx v10, v8, a4
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a4
+; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a5, 1
+; LMULMAX1-NEXT:    addi a5, a5, -241
+; LMULMAX1-NEXT:    vand.vx v8, v8, a5
+; LMULMAX1-NEXT:    li a6, 257
+; LMULMAX1-NEXT:    vmul.vx v8, v8, a6
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT:    vsub.vx v10, v9, a2
+; LMULMAX1-NEXT:    vnot.v v9, v9
+; LMULMAX1-NEXT:    vand.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vand.vx v10, v10, a3
+; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v10, v9, a4
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT:    vand.vx v9, v9, a4
+; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v9, v9, a5
+; LMULMAX1-NEXT:    vmul.vx v9, v9, a6
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 8
+; LMULMAX1-NEXT:    vse16.v v9, (a0)
+; LMULMAX1-NEXT:    vse16.v v8, (a1)
+; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX8-LABEL: cttz_v16i16:
 ; LMULMAX8:       # %bb.0:
@@ -981,11 +873,11 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v10
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
 ; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -993,10 +885,10 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
@@ -1400,11 +1292,11 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 5
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 3
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -1412,7 +1304,7 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 1
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    li a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
@@ -1420,65 +1312,35 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vse16.v v8, (a0)
 ; LMULMAX2-RV64I-NEXT:    ret
 ;
-; LMULMAX1-RV32-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    li a1, 1
-; LMULMAX1-RV32-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a1, 5
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 3
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 1
-; LMULMAX1-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    li a1, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    li a1, 1
-; LMULMAX1-RV64-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a1, 5
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 3
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 1
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    li a1, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX1-LABEL: cttz_zero_undef_v8i16:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT:    vle16.v v8, (a0)
+; LMULMAX1-NEXT:    li a1, 1
+; LMULMAX1-NEXT:    vsub.vx v9, v8, a1
+; LMULMAX1-NEXT:    vnot.v v8, v8
+; LMULMAX1-NEXT:    vand.vv v8, v8, v9
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT:    lui a1, 5
+; LMULMAX1-NEXT:    addi a1, a1, 1365
+; LMULMAX1-NEXT:    vand.vx v9, v9, a1
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v9
+; LMULMAX1-NEXT:    lui a1, 3
+; LMULMAX1-NEXT:    addi a1, a1, 819
+; LMULMAX1-NEXT:    vand.vx v9, v8, a1
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a1
+; LMULMAX1-NEXT:    vadd.vv v8, v9, v8
+; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v9
+; LMULMAX1-NEXT:    lui a1, 1
+; LMULMAX1-NEXT:    addi a1, a1, -241
+; LMULMAX1-NEXT:    vand.vx v8, v8, a1
+; LMULMAX1-NEXT:    li a1, 257
+; LMULMAX1-NEXT:    vmul.vx v8, v8, a1
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT:    vse16.v v8, (a0)
+; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v8i16:
 ; LMULMAX2-RV32F:       # %bb.0:
@@ -1601,11 +1463,11 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -1613,10 +1475,10 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
 ; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
@@ -1987,161 +1849,83 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
 }
 
 define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    li a1, 1
-; LMULMAX2-RV32-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 5
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 3
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 1
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    li a1, 257
-; LMULMAX2-RV32-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    li a1, 1
-; LMULMAX2-RV64-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 5
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 3
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 1
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    li a1, 257
-; LMULMAX2-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    li a2, 1
-; LMULMAX1-RV32-NEXT:    vsub.vx v10, v8, a2
-; LMULMAX1-RV32-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a3, 5
-; LMULMAX1-RV32-NEXT:    addi a3, a3, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a4, 3
-; LMULMAX1-RV32-NEXT:    addi a4, a4, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v8, a4
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    lui a5, 1
-; LMULMAX1-RV32-NEXT:    addi a5, a5, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT:    li a6, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a6
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vsub.vx v10, v9, a2
-; LMULMAX1-RV32-NEXT:    vnot.v v9, v9
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v9, a4
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT:    vmul.vx v9, v9, a6
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    li a2, 1
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v8, a2
-; LMULMAX1-RV64-NEXT:    vnot.v v8, v8
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a3, 5
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a4, 3
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v8, a4
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a5, 1
-; LMULMAX1-RV64-NEXT:    addiw a5, a5, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    li a6, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a6
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v9, a2
-; LMULMAX1-RV64-NEXT:    vnot.v v9, v9
-; LMULMAX1-RV64-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v9, a4
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a6
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX2-LABEL: cttz_zero_undef_v16i16:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT:    vle16.v v8, (a0)
+; LMULMAX2-NEXT:    li a1, 1
+; LMULMAX2-NEXT:    vsub.vx v10, v8, a1
+; LMULMAX2-NEXT:    vnot.v v8, v8
+; LMULMAX2-NEXT:    vand.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    lui a1, 5
+; LMULMAX2-NEXT:    addi a1, a1, 1365
+; LMULMAX2-NEXT:    vand.vx v10, v10, a1
+; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 3
+; LMULMAX2-NEXT:    addi a1, a1, 819
+; LMULMAX2-NEXT:    vand.vx v10, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 1
+; LMULMAX2-NEXT:    addi a1, a1, -241
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    li a1, 257
+; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX2-NEXT:    vse16.v v8, (a0)
+; LMULMAX2-NEXT:    ret
+;
+; LMULMAX1-LABEL: cttz_zero_undef_v16i16:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    vle16.v v8, (a1)
+; LMULMAX1-NEXT:    vle16.v v9, (a0)
+; LMULMAX1-NEXT:    li a2, 1
+; LMULMAX1-NEXT:    vsub.vx v10, v8, a2
+; LMULMAX1-NEXT:    vnot.v v8, v8
+; LMULMAX1-NEXT:    vand.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    lui a3, 5
+; LMULMAX1-NEXT:    addi a3, a3, 1365
+; LMULMAX1-NEXT:    vand.vx v10, v10, a3
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a4, 3
+; LMULMAX1-NEXT:    addi a4, a4, 819
+; LMULMAX1-NEXT:    vand.vx v10, v8, a4
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a4
+; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX1-NEXT:    lui a5, 1
+; LMULMAX1-NEXT:    addi a5, a5, -241
+; LMULMAX1-NEXT:    vand.vx v8, v8, a5
+; LMULMAX1-NEXT:    li a6, 257
+; LMULMAX1-NEXT:    vmul.vx v8, v8, a6
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT:    vsub.vx v10, v9, a2
+; LMULMAX1-NEXT:    vnot.v v9, v9
+; LMULMAX1-NEXT:    vand.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vand.vx v10, v10, a3
+; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v10, v9, a4
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT:    vand.vx v9, v9, a4
+; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v9, v9, a5
+; LMULMAX1-NEXT:    vmul.vx v9, v9, a6
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 8
+; LMULMAX1-NEXT:    vse16.v v9, (a0)
+; LMULMAX1-NEXT:    vse16.v v8, (a1)
+; LMULMAX1-NEXT:    ret
 ;
 ; LMULMAX8-LABEL: cttz_zero_undef_v16i16:
 ; LMULMAX8:       # %bb.0:
@@ -2212,11 +1996,11 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v10
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
 ; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
 ; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
 ; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
 ; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
@@ -2224,10 +2008,10 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
 ; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
 ; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
 ; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
 ; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
@@ -2501,3 +2285,8 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
   store <4 x i64> %c, ptr %x
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LMULMAX1-RV32: {{.*}}
+; LMULMAX1-RV64: {{.*}}
+; LMULMAX2-RV32: {{.*}}
+; LMULMAX2-RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index 5605437443d76bb..95c1beb284c4003 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -997,7 +997,7 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
 ; RV64NOM-NEXT:    vand.vv v8, v8, v11
 ; RV64NOM-NEXT:    vadd.vv v8, v9, v8
 ; RV64NOM-NEXT:    lui a0, 12320
-; RV64NOM-NEXT:    addiw a0, a0, 257
+; RV64NOM-NEXT:    addi a0, a0, 257
 ; RV64NOM-NEXT:    vmv.s.x v9, a0
 ; RV64NOM-NEXT:    vsext.vf4 v10, v9
 ; RV64NOM-NEXT:    vsra.vv v8, v8, v10
@@ -1053,7 +1053,7 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) {
 ; RV64NOM-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64NOM-NEXT:    vsrl.vi v8, v8, 0
 ; RV64NOM-NEXT:    lui a0, 322639
-; RV64NOM-NEXT:    addiw a0, a0, -945
+; RV64NOM-NEXT:    addi a0, a0, -945
 ; RV64NOM-NEXT:    vmulhu.vx v8, v8, a0
 ; RV64NOM-NEXT:    vslidedown.vi v8, v8, 2
 ; RV64NOM-NEXT:    vmv.x.s a0, v8
@@ -1064,7 +1064,7 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) {
 ; RV64M-LABEL: extractelt_udiv_v4i32:
 ; RV64M:       # %bb.0:
 ; RV64M-NEXT:    lui a0, 322639
-; RV64M-NEXT:    addiw a0, a0, -945
+; RV64M-NEXT:    addi a0, a0, -945
 ; RV64M-NEXT:    slli a0, a0, 32
 ; RV64M-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; RV64M-NEXT:    vslidedown.vi v8, v8, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index c1a4aa4b05d4bec..ea818df7329c7d1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -236,79 +236,42 @@ define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) {
 }
 
 define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
-; RV32-V128-LABEL: interleave_v32f32:
-; RV32-V128:       # %bb.0:
-; RV32-V128-NEXT:    addi sp, sp, -16
-; RV32-V128-NEXT:    .cfi_def_cfa_offset 16
-; RV32-V128-NEXT:    csrr a0, vlenb
-; RV32-V128-NEXT:    slli a0, a0, 2
-; RV32-V128-NEXT:    sub sp, sp, a0
-; RV32-V128-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; RV32-V128-NEXT:    lui a0, %hi(.LCPI10_0)
-; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI10_0)
-; RV32-V128-NEXT:    li a1, 32
-; RV32-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; RV32-V128-NEXT:    vle16.v v4, (a0)
-; RV32-V128-NEXT:    lui a0, %hi(.LCPI10_1)
-; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI10_1)
-; RV32-V128-NEXT:    vle16.v v24, (a0)
-; RV32-V128-NEXT:    addi a0, sp, 16
-; RV32-V128-NEXT:    vs4r.v v24, (a0) # Unknown-size Folded Spill
-; RV32-V128-NEXT:    lui a0, 699051
-; RV32-V128-NEXT:    addi a0, a0, -1366
-; RV32-V128-NEXT:    vmv.s.x v0, a0
-; RV32-V128-NEXT:    vrgatherei16.vv v24, v8, v4
-; RV32-V128-NEXT:    addi a0, sp, 16
-; RV32-V128-NEXT:    vl4r.v v12, (a0) # Unknown-size Folded Reload
-; RV32-V128-NEXT:    vrgatherei16.vv v24, v16, v12, v0.t
-; RV32-V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-V128-NEXT:    vwaddu.vv v0, v8, v16
-; RV32-V128-NEXT:    li a0, -1
-; RV32-V128-NEXT:    vwmaccu.vx v0, a0, v16
-; RV32-V128-NEXT:    vmv8r.v v8, v0
-; RV32-V128-NEXT:    vmv8r.v v16, v24
-; RV32-V128-NEXT:    csrr a0, vlenb
-; RV32-V128-NEXT:    slli a0, a0, 2
-; RV32-V128-NEXT:    add sp, sp, a0
-; RV32-V128-NEXT:    addi sp, sp, 16
-; RV32-V128-NEXT:    ret
-;
-; RV64-V128-LABEL: interleave_v32f32:
-; RV64-V128:       # %bb.0:
-; RV64-V128-NEXT:    addi sp, sp, -16
-; RV64-V128-NEXT:    .cfi_def_cfa_offset 16
-; RV64-V128-NEXT:    csrr a0, vlenb
-; RV64-V128-NEXT:    slli a0, a0, 2
-; RV64-V128-NEXT:    sub sp, sp, a0
-; RV64-V128-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; RV64-V128-NEXT:    lui a0, %hi(.LCPI10_0)
-; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI10_0)
-; RV64-V128-NEXT:    li a1, 32
-; RV64-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; RV64-V128-NEXT:    vle16.v v4, (a0)
-; RV64-V128-NEXT:    lui a0, %hi(.LCPI10_1)
-; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI10_1)
-; RV64-V128-NEXT:    vle16.v v24, (a0)
-; RV64-V128-NEXT:    addi a0, sp, 16
-; RV64-V128-NEXT:    vs4r.v v24, (a0) # Unknown-size Folded Spill
-; RV64-V128-NEXT:    lui a0, 699051
-; RV64-V128-NEXT:    addiw a0, a0, -1366
-; RV64-V128-NEXT:    vmv.s.x v0, a0
-; RV64-V128-NEXT:    vrgatherei16.vv v24, v8, v4
-; RV64-V128-NEXT:    addi a0, sp, 16
-; RV64-V128-NEXT:    vl4r.v v12, (a0) # Unknown-size Folded Reload
-; RV64-V128-NEXT:    vrgatherei16.vv v24, v16, v12, v0.t
-; RV64-V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV64-V128-NEXT:    vwaddu.vv v0, v8, v16
-; RV64-V128-NEXT:    li a0, -1
-; RV64-V128-NEXT:    vwmaccu.vx v0, a0, v16
-; RV64-V128-NEXT:    vmv8r.v v8, v0
-; RV64-V128-NEXT:    vmv8r.v v16, v24
-; RV64-V128-NEXT:    csrr a0, vlenb
-; RV64-V128-NEXT:    slli a0, a0, 2
-; RV64-V128-NEXT:    add sp, sp, a0
-; RV64-V128-NEXT:    addi sp, sp, 16
-; RV64-V128-NEXT:    ret
+; V128-LABEL: interleave_v32f32:
+; V128:       # %bb.0:
+; V128-NEXT:    addi sp, sp, -16
+; V128-NEXT:    .cfi_def_cfa_offset 16
+; V128-NEXT:    csrr a0, vlenb
+; V128-NEXT:    slli a0, a0, 2
+; V128-NEXT:    sub sp, sp, a0
+; V128-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; V128-NEXT:    lui a0, %hi(.LCPI10_0)
+; V128-NEXT:    addi a0, a0, %lo(.LCPI10_0)
+; V128-NEXT:    li a1, 32
+; V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; V128-NEXT:    vle16.v v4, (a0)
+; V128-NEXT:    lui a0, %hi(.LCPI10_1)
+; V128-NEXT:    addi a0, a0, %lo(.LCPI10_1)
+; V128-NEXT:    vle16.v v24, (a0)
+; V128-NEXT:    addi a0, sp, 16
+; V128-NEXT:    vs4r.v v24, (a0) # Unknown-size Folded Spill
+; V128-NEXT:    lui a0, 699051
+; V128-NEXT:    addi a0, a0, -1366
+; V128-NEXT:    vmv.s.x v0, a0
+; V128-NEXT:    vrgatherei16.vv v24, v8, v4
+; V128-NEXT:    addi a0, sp, 16
+; V128-NEXT:    vl4r.v v12, (a0) # Unknown-size Folded Reload
+; V128-NEXT:    vrgatherei16.vv v24, v16, v12, v0.t
+; V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; V128-NEXT:    vwaddu.vv v0, v8, v16
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v0, a0, v16
+; V128-NEXT:    vmv8r.v v8, v0
+; V128-NEXT:    vmv8r.v v16, v24
+; V128-NEXT:    csrr a0, vlenb
+; V128-NEXT:    slli a0, a0, 2
+; V128-NEXT:    add sp, sp, a0
+; V128-NEXT:    addi sp, sp, 16
+; V128-NEXT:    ret
 ;
 ; V512-LABEL: interleave_v32f32:
 ; V512:       # %bb.0:
@@ -375,31 +338,18 @@ define <4 x float> @unary_interleave_v4f32(<4 x float> %x) {
 
 ; FIXME: Is there better codegen we can do here?
 define <4 x double> @unary_interleave_v4f64(<4 x double> %x) {
-; RV32-V128-LABEL: unary_interleave_v4f64:
-; RV32-V128:       # %bb.0:
-; RV32-V128-NEXT:    lui a0, 12304
-; RV32-V128-NEXT:    addi a0, a0, 512
-; RV32-V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-V128-NEXT:    vmv.s.x v10, a0
-; RV32-V128-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-V128-NEXT:    vsext.vf2 v12, v10
-; RV32-V128-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-V128-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV32-V128-NEXT:    vmv.v.v v8, v10
-; RV32-V128-NEXT:    ret
-;
-; RV64-V128-LABEL: unary_interleave_v4f64:
-; RV64-V128:       # %bb.0:
-; RV64-V128-NEXT:    lui a0, 12304
-; RV64-V128-NEXT:    addiw a0, a0, 512
-; RV64-V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-V128-NEXT:    vmv.s.x v10, a0
-; RV64-V128-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-V128-NEXT:    vsext.vf2 v12, v10
-; RV64-V128-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-V128-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV64-V128-NEXT:    vmv.v.v v8, v10
-; RV64-V128-NEXT:    ret
+; V128-LABEL: unary_interleave_v4f64:
+; V128:       # %bb.0:
+; V128-NEXT:    lui a0, 12304
+; V128-NEXT:    addi a0, a0, 512
+; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V128-NEXT:    vmv.s.x v10, a0
+; V128-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; V128-NEXT:    vsext.vf2 v12, v10
+; V128-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; V128-NEXT:    vrgatherei16.vv v10, v8, v12
+; V128-NEXT:    vmv.v.v v8, v10
+; V128-NEXT:    ret
 ;
 ; RV32-V512-LABEL: unary_interleave_v4f64:
 ; RV32-V512:       # %bb.0:
@@ -417,7 +367,7 @@ define <4 x double> @unary_interleave_v4f64(<4 x double> %x) {
 ; RV64-V512-LABEL: unary_interleave_v4f64:
 ; RV64-V512:       # %bb.0:
 ; RV64-V512-NEXT:    lui a0, 12304
-; RV64-V512-NEXT:    addiw a0, a0, 512
+; RV64-V512-NEXT:    addi a0, a0, 512
 ; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
 ; RV64-V512-NEXT:    vmv.s.x v9, a0
 ; RV64-V512-NEXT:    vsext.vf8 v10, v9
@@ -477,3 +427,6 @@ define <8 x float> @unary_interleave_v8f32(<8 x float> %x) {
   %a = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 4, i32 0, i32 undef, i32 1, i32 6, i32 undef, i32 7, i32 3>
   ret <8 x float> %a
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32-V128: {{.*}}
+; RV64-V128: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
index a7852ea5843d8a0..59eb4b89a2f56e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -55,61 +55,35 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
 }
 
 define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
-; RV32-LABEL: vrgather_permute_shuffle_vu_v4f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 4096
-; RV32-NEXT:    addi a0, a0, 513
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v10, a0
-; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsext.vf2 v12, v10
-; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vrgather_permute_shuffle_vu_v4f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 4096
-; RV64-NEXT:    addiw a0, a0, 513
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v10, a0
-; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsext.vf2 v12, v10
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    addi a0, a0, 513
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v10
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
   %s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
   ret <4 x double> %s
 }
 
 define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
-; RV32-LABEL: vrgather_permute_shuffle_uv_v4f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 4096
-; RV32-NEXT:    addi a0, a0, 513
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v10, a0
-; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsext.vf2 v12, v10
-; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vrgather_permute_shuffle_uv_v4f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 4096
-; RV64-NEXT:    addiw a0, a0, 513
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v10, a0
-; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsext.vf2 v12, v10
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: vrgather_permute_shuffle_uv_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    addi a0, a0, 513
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v10
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
   %s = shufflevector <4 x double> poison, <4 x double> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
   ret <4 x double> %s
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index b648420aa2e03cb..e9412019a0dec8e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -324,40 +324,24 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
 }
 
 define <4 x i8> @buildvec_no_vid_v4i8_0() {
-; RV32-LABEL: buildvec_no_vid_v4i8_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 28768
-; RV32-NEXT:    addi a0, a0, 769
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v8, a0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: buildvec_no_vid_v4i8_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 28768
-; RV64-NEXT:    addiw a0, a0, 769
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v8, a0
-; RV64-NEXT:    ret
+; CHECK-LABEL: buildvec_no_vid_v4i8_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 28768
+; CHECK-NEXT:    addi a0, a0, 769
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    ret
   ret <4 x i8> <i8 1, i8 3, i8 6, i8 7>
 }
 
 define <4 x i8> @buildvec_no_vid_v4i8_1() {
-; RV32-LABEL: buildvec_no_vid_v4i8_1:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 28752
-; RV32-NEXT:    addi a0, a0, 512
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v8, a0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: buildvec_no_vid_v4i8_1:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 28752
-; RV64-NEXT:    addiw a0, a0, 512
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v8, a0
-; RV64-NEXT:    ret
+; CHECK-LABEL: buildvec_no_vid_v4i8_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 28752
+; CHECK-NEXT:    addi a0, a0, 512
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    ret
   ret <4 x i8> <i8 undef, i8 2, i8 5, i8 7>
 }
 
@@ -372,21 +356,13 @@ define <4 x i8> @buildvec_no_vid_v4i8_2() {
 }
 
 define <4 x i8> @buildvec_no_vid_v4i8_3() {
-; RV32-LABEL: buildvec_no_vid_v4i8_3:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 28672
-; RV32-NEXT:    addi a0, a0, 255
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v8, a0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: buildvec_no_vid_v4i8_3:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 28672
-; RV64-NEXT:    addiw a0, a0, 255
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v8, a0
-; RV64-NEXT:    ret
+; CHECK-LABEL: buildvec_no_vid_v4i8_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 28672
+; CHECK-NEXT:    addi a0, a0, 255
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    ret
   ret <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>
 }
 
@@ -400,21 +376,13 @@ define <4 x i8> @buildvec_no_vid_v4i8_4() {
 }
 
 define <4 x i8> @buildvec_no_vid_v4i8_5() {
-; RV32-LABEL: buildvec_no_vid_v4i8_5:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1032144
-; RV32-NEXT:    addi a0, a0, -257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v8, a0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: buildvec_no_vid_v4i8_5:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1032144
-; RV64-NEXT:    addiw a0, a0, -257
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v8, a0
-; RV64-NEXT:    ret
+; CHECK-LABEL: buildvec_no_vid_v4i8_5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1032144
+; CHECK-NEXT:    addi a0, a0, -257
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    ret
   ret <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>
 }
 
@@ -529,25 +497,15 @@ define void @buildvec_seq_v8i8_v4i16(ptr %x) {
 }
 
 define void @buildvec_seq_v8i8_v2i32(ptr %x) {
-; RV32-LABEL: buildvec_seq_v8i8_v2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, 48
-; RV32-NEXT:    addi a1, a1, 513
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a1
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vse8.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: buildvec_seq_v8i8_v2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a1, 48
-; RV64-NEXT:    addiw a1, a1, 513
-; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT:    vmv.v.x v8, a1
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vse8.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: buildvec_seq_v8i8_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 48
+; CHECK-NEXT:    addi a1, a1, 513
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   store <8 x i8> <i8 1, i8 2, i8 3, i8 undef, i8 1, i8 2, i8 3, i8 undef>, ptr %x
   ret void
 }
@@ -823,21 +781,13 @@ define <4 x i8> @buildvec_not_vid_v4i8_1() {
 }
 
 define <4 x i8> @buildvec_not_vid_v4i8_2() {
-; RV32-LABEL: buildvec_not_vid_v4i8_2:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, 771
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v8, a0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: buildvec_not_vid_v4i8_2:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, 771
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v8, a0
-; RV64-NEXT:    ret
+; CHECK-LABEL: buildvec_not_vid_v4i8_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, 771
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    ret
   ret <4 x i8> <i8 3, i8 3, i8 1, i8 0>
 }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index 83e64651c5c6313..a54fa2e9b765fef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -395,79 +395,42 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
 }
 
 define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
-; RV32-V128-LABEL: interleave_v32i32:
-; RV32-V128:       # %bb.0:
-; RV32-V128-NEXT:    addi sp, sp, -16
-; RV32-V128-NEXT:    .cfi_def_cfa_offset 16
-; RV32-V128-NEXT:    csrr a0, vlenb
-; RV32-V128-NEXT:    slli a0, a0, 2
-; RV32-V128-NEXT:    sub sp, sp, a0
-; RV32-V128-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; RV32-V128-NEXT:    lui a0, %hi(.LCPI17_0)
-; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI17_0)
-; RV32-V128-NEXT:    li a1, 32
-; RV32-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; RV32-V128-NEXT:    vle16.v v4, (a0)
-; RV32-V128-NEXT:    lui a0, %hi(.LCPI17_1)
-; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI17_1)
-; RV32-V128-NEXT:    vle16.v v24, (a0)
-; RV32-V128-NEXT:    addi a0, sp, 16
-; RV32-V128-NEXT:    vs4r.v v24, (a0) # Unknown-size Folded Spill
-; RV32-V128-NEXT:    lui a0, 699051
-; RV32-V128-NEXT:    addi a0, a0, -1366
-; RV32-V128-NEXT:    vmv.s.x v0, a0
-; RV32-V128-NEXT:    vrgatherei16.vv v24, v8, v4
-; RV32-V128-NEXT:    addi a0, sp, 16
-; RV32-V128-NEXT:    vl4r.v v12, (a0) # Unknown-size Folded Reload
-; RV32-V128-NEXT:    vrgatherei16.vv v24, v16, v12, v0.t
-; RV32-V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-V128-NEXT:    vwaddu.vv v0, v8, v16
-; RV32-V128-NEXT:    li a0, -1
-; RV32-V128-NEXT:    vwmaccu.vx v0, a0, v16
-; RV32-V128-NEXT:    vmv8r.v v8, v0
-; RV32-V128-NEXT:    vmv8r.v v16, v24
-; RV32-V128-NEXT:    csrr a0, vlenb
-; RV32-V128-NEXT:    slli a0, a0, 2
-; RV32-V128-NEXT:    add sp, sp, a0
-; RV32-V128-NEXT:    addi sp, sp, 16
-; RV32-V128-NEXT:    ret
-;
-; RV64-V128-LABEL: interleave_v32i32:
-; RV64-V128:       # %bb.0:
-; RV64-V128-NEXT:    addi sp, sp, -16
-; RV64-V128-NEXT:    .cfi_def_cfa_offset 16
-; RV64-V128-NEXT:    csrr a0, vlenb
-; RV64-V128-NEXT:    slli a0, a0, 2
-; RV64-V128-NEXT:    sub sp, sp, a0
-; RV64-V128-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; RV64-V128-NEXT:    lui a0, %hi(.LCPI17_0)
-; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI17_0)
-; RV64-V128-NEXT:    li a1, 32
-; RV64-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; RV64-V128-NEXT:    vle16.v v4, (a0)
-; RV64-V128-NEXT:    lui a0, %hi(.LCPI17_1)
-; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI17_1)
-; RV64-V128-NEXT:    vle16.v v24, (a0)
-; RV64-V128-NEXT:    addi a0, sp, 16
-; RV64-V128-NEXT:    vs4r.v v24, (a0) # Unknown-size Folded Spill
-; RV64-V128-NEXT:    lui a0, 699051
-; RV64-V128-NEXT:    addiw a0, a0, -1366
-; RV64-V128-NEXT:    vmv.s.x v0, a0
-; RV64-V128-NEXT:    vrgatherei16.vv v24, v8, v4
-; RV64-V128-NEXT:    addi a0, sp, 16
-; RV64-V128-NEXT:    vl4r.v v12, (a0) # Unknown-size Folded Reload
-; RV64-V128-NEXT:    vrgatherei16.vv v24, v16, v12, v0.t
-; RV64-V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV64-V128-NEXT:    vwaddu.vv v0, v8, v16
-; RV64-V128-NEXT:    li a0, -1
-; RV64-V128-NEXT:    vwmaccu.vx v0, a0, v16
-; RV64-V128-NEXT:    vmv8r.v v8, v0
-; RV64-V128-NEXT:    vmv8r.v v16, v24
-; RV64-V128-NEXT:    csrr a0, vlenb
-; RV64-V128-NEXT:    slli a0, a0, 2
-; RV64-V128-NEXT:    add sp, sp, a0
-; RV64-V128-NEXT:    addi sp, sp, 16
-; RV64-V128-NEXT:    ret
+; V128-LABEL: interleave_v32i32:
+; V128:       # %bb.0:
+; V128-NEXT:    addi sp, sp, -16
+; V128-NEXT:    .cfi_def_cfa_offset 16
+; V128-NEXT:    csrr a0, vlenb
+; V128-NEXT:    slli a0, a0, 2
+; V128-NEXT:    sub sp, sp, a0
+; V128-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; V128-NEXT:    lui a0, %hi(.LCPI17_0)
+; V128-NEXT:    addi a0, a0, %lo(.LCPI17_0)
+; V128-NEXT:    li a1, 32
+; V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; V128-NEXT:    vle16.v v4, (a0)
+; V128-NEXT:    lui a0, %hi(.LCPI17_1)
+; V128-NEXT:    addi a0, a0, %lo(.LCPI17_1)
+; V128-NEXT:    vle16.v v24, (a0)
+; V128-NEXT:    addi a0, sp, 16
+; V128-NEXT:    vs4r.v v24, (a0) # Unknown-size Folded Spill
+; V128-NEXT:    lui a0, 699051
+; V128-NEXT:    addi a0, a0, -1366
+; V128-NEXT:    vmv.s.x v0, a0
+; V128-NEXT:    vrgatherei16.vv v24, v8, v4
+; V128-NEXT:    addi a0, sp, 16
+; V128-NEXT:    vl4r.v v12, (a0) # Unknown-size Folded Reload
+; V128-NEXT:    vrgatherei16.vv v24, v16, v12, v0.t
+; V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; V128-NEXT:    vwaddu.vv v0, v8, v16
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v0, a0, v16
+; V128-NEXT:    vmv8r.v v8, v0
+; V128-NEXT:    vmv8r.v v16, v24
+; V128-NEXT:    csrr a0, vlenb
+; V128-NEXT:    slli a0, a0, 2
+; V128-NEXT:    add sp, sp, a0
+; V128-NEXT:    addi sp, sp, 16
+; V128-NEXT:    ret
 ;
 ; V512-LABEL: interleave_v32i32:
 ; V512:       # %bb.0:
@@ -509,49 +472,27 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) {
 
 ; This shouldn't be interleaved
 define <4 x i8> @unary_interleave_v4i8_invalid(<4 x i8> %x) {
-; RV32-V128-LABEL: unary_interleave_v4i8_invalid:
-; RV32-V128:       # %bb.0:
-; RV32-V128-NEXT:    lui a0, 16
-; RV32-V128-NEXT:    addi a0, a0, 768
-; RV32-V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-V128-NEXT:    vmv.s.x v10, a0
-; RV32-V128-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; RV32-V128-NEXT:    vrgather.vv v9, v8, v10
-; RV32-V128-NEXT:    vmv1r.v v8, v9
-; RV32-V128-NEXT:    ret
-;
-; RV64-V128-LABEL: unary_interleave_v4i8_invalid:
-; RV64-V128:       # %bb.0:
-; RV64-V128-NEXT:    lui a0, 16
-; RV64-V128-NEXT:    addiw a0, a0, 768
-; RV64-V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-V128-NEXT:    vmv.s.x v10, a0
-; RV64-V128-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; RV64-V128-NEXT:    vrgather.vv v9, v8, v10
-; RV64-V128-NEXT:    vmv1r.v v8, v9
-; RV64-V128-NEXT:    ret
-;
-; RV32-V512-LABEL: unary_interleave_v4i8_invalid:
-; RV32-V512:       # %bb.0:
-; RV32-V512-NEXT:    lui a0, 16
-; RV32-V512-NEXT:    addi a0, a0, 768
-; RV32-V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
-; RV32-V512-NEXT:    vmv.s.x v10, a0
-; RV32-V512-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-V512-NEXT:    vrgather.vv v9, v8, v10
-; RV32-V512-NEXT:    vmv1r.v v8, v9
-; RV32-V512-NEXT:    ret
+; V128-LABEL: unary_interleave_v4i8_invalid:
+; V128:       # %bb.0:
+; V128-NEXT:    lui a0, 16
+; V128-NEXT:    addi a0, a0, 768
+; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V128-NEXT:    vmv.s.x v10, a0
+; V128-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
+; V128-NEXT:    vrgather.vv v9, v8, v10
+; V128-NEXT:    vmv1r.v v8, v9
+; V128-NEXT:    ret
 ;
-; RV64-V512-LABEL: unary_interleave_v4i8_invalid:
-; RV64-V512:       # %bb.0:
-; RV64-V512-NEXT:    lui a0, 16
-; RV64-V512-NEXT:    addiw a0, a0, 768
-; RV64-V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
-; RV64-V512-NEXT:    vmv.s.x v10, a0
-; RV64-V512-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-V512-NEXT:    vrgather.vv v9, v8, v10
-; RV64-V512-NEXT:    vmv1r.v v8, v9
-; RV64-V512-NEXT:    ret
+; V512-LABEL: unary_interleave_v4i8_invalid:
+; V512:       # %bb.0:
+; V512-NEXT:    lui a0, 16
+; V512-NEXT:    addi a0, a0, 768
+; V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; V512-NEXT:    vmv.s.x v10, a0
+; V512-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
+; V512-NEXT:    vrgather.vv v9, v8, v10
+; V512-NEXT:    vmv1r.v v8, v9
+; V512-NEXT:    ret
   %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 4>
   ret <4 x i8> %a
 }
@@ -608,31 +549,18 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) {
 
 ; FIXME: Is there better codegen we can do here?
 define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) {
-; RV32-V128-LABEL: unary_interleave_v4i64:
-; RV32-V128:       # %bb.0:
-; RV32-V128-NEXT:    lui a0, 12304
-; RV32-V128-NEXT:    addi a0, a0, 512
-; RV32-V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-V128-NEXT:    vmv.s.x v10, a0
-; RV32-V128-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-V128-NEXT:    vsext.vf2 v12, v10
-; RV32-V128-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-V128-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV32-V128-NEXT:    vmv.v.v v8, v10
-; RV32-V128-NEXT:    ret
-;
-; RV64-V128-LABEL: unary_interleave_v4i64:
-; RV64-V128:       # %bb.0:
-; RV64-V128-NEXT:    lui a0, 12304
-; RV64-V128-NEXT:    addiw a0, a0, 512
-; RV64-V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-V128-NEXT:    vmv.s.x v10, a0
-; RV64-V128-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-V128-NEXT:    vsext.vf2 v12, v10
-; RV64-V128-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-V128-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV64-V128-NEXT:    vmv.v.v v8, v10
-; RV64-V128-NEXT:    ret
+; V128-LABEL: unary_interleave_v4i64:
+; V128:       # %bb.0:
+; V128-NEXT:    lui a0, 12304
+; V128-NEXT:    addi a0, a0, 512
+; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V128-NEXT:    vmv.s.x v10, a0
+; V128-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; V128-NEXT:    vsext.vf2 v12, v10
+; V128-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; V128-NEXT:    vrgatherei16.vv v10, v8, v12
+; V128-NEXT:    vmv.v.v v8, v10
+; V128-NEXT:    ret
 ;
 ; RV32-V512-LABEL: unary_interleave_v4i64:
 ; RV32-V512:       # %bb.0:
@@ -650,7 +578,7 @@ define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) {
 ; RV64-V512-LABEL: unary_interleave_v4i64:
 ; RV64-V512:       # %bb.0:
 ; RV64-V512-NEXT:    lui a0, 12304
-; RV64-V512-NEXT:    addiw a0, a0, 512
+; RV64-V512-NEXT:    addi a0, a0, 512
 ; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
 ; RV64-V512-NEXT:    vmv.s.x v9, a0
 ; RV64-V512-NEXT:    vsext.vf8 v10, v9
@@ -750,3 +678,6 @@ define <4 x i8> @unary_interleave_10uu_v4i8(<4 x i8> %x) {
   ret <4 x i8> %a
 }
 
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32-V128: {{.*}}
+; RV64-V128: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 927fd3e203355c0..a56a81f5f793bc2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -51,57 +51,33 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) {
 }
 
 define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) {
-; RV32-LABEL: vrgather_permute_shuffle_vu_v4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 4096
-; RV32-NEXT:    addi a0, a0, 513
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v9, a0
-; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsext.vf2 v10, v9
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv1r.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vrgather_permute_shuffle_vu_v4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 4096
-; RV64-NEXT:    addiw a0, a0, 513
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v9, a0
-; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsext.vf2 v10, v9
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv1r.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    addi a0, a0, 513
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v9
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
   %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
   ret <4 x i16> %s
 }
 
 define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) {
-; RV32-LABEL: vrgather_permute_shuffle_uv_v4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 4096
-; RV32-NEXT:    addi a0, a0, 513
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v9, a0
-; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsext.vf2 v10, v9
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv1r.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vrgather_permute_shuffle_uv_v4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 4096
-; RV64-NEXT:    addiw a0, a0, 513
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v9, a0
-; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsext.vf2 v10, v9
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv1r.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 4096
+; CHECK-NEXT:    addi a0, a0, 513
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v9
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
   %s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
   ret <4 x i16> %s
 }
@@ -439,33 +415,19 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) {
 }
 
 define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) {
-; RV32-LABEL: splat_ve2_we0_ins_i2ve4:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 8256
-; RV32-NEXT:    addi a0, a0, 514
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.v.x v11, a0
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; RV32-NEXT:    li a0, 66
-; RV32-NEXT:    vmv.s.x v0, a0
-; RV32-NEXT:    vrgather.vv v10, v8, v11
-; RV32-NEXT:    vrgather.vi v10, v9, 0, v0.t
-; RV32-NEXT:    vmv1r.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: splat_ve2_we0_ins_i2ve4:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 8256
-; RV64-NEXT:    addiw a0, a0, 514
-; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT:    vmv.v.x v11, a0
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; RV64-NEXT:    li a0, 66
-; RV64-NEXT:    vmv.s.x v0, a0
-; RV64-NEXT:    vrgather.vv v10, v8, v11
-; RV64-NEXT:    vrgather.vi v10, v9, 0, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: splat_ve2_we0_ins_i2ve4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8256
+; CHECK-NEXT:    addi a0, a0, 514
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v11, a0
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    li a0, 66
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
+; CHECK-NEXT:    vrgather.vi v10, v9, 0, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
   %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 2, i32 2, i32 2, i32 8, i32 2>
   ret <8 x i8> %shuff
 }
@@ -490,43 +452,24 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) {
 }
 
 define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) {
-; RV32-LABEL: splat_ve2_we0_ins_i2ve4_i5we6:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vmv.v.i v10, 6
-; RV32-NEXT:    vmv.v.i v11, 0
-; RV32-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; RV32-NEXT:    vslideup.vi v11, v10, 5
-; RV32-NEXT:    lui a0, 8256
-; RV32-NEXT:    addi a0, a0, 2
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a0
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; RV32-NEXT:    li a0, 98
-; RV32-NEXT:    vmv.s.x v0, a0
-; RV32-NEXT:    vrgather.vv v10, v8, v12
-; RV32-NEXT:    vrgather.vv v10, v9, v11, v0.t
-; RV32-NEXT:    vmv1r.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: splat_ve2_we0_ins_i2ve4_i5we6:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vmv.v.i v10, 6
-; RV64-NEXT:    vmv.v.i v11, 0
-; RV64-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; RV64-NEXT:    vslideup.vi v11, v10, 5
-; RV64-NEXT:    lui a0, 8256
-; RV64-NEXT:    addiw a0, a0, 2
-; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT:    vmv.v.x v12, a0
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; RV64-NEXT:    li a0, 98
-; RV64-NEXT:    vmv.s.x v0, a0
-; RV64-NEXT:    vrgather.vv v10, v8, v12
-; RV64-NEXT:    vrgather.vv v10, v9, v11, v0.t
-; RV64-NEXT:    vmv1r.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: splat_ve2_we0_ins_i2ve4_i5we6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v10, 6
+; CHECK-NEXT:    vmv.v.i v11, 0
+; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
+; CHECK-NEXT:    vslideup.vi v11, v10, 5
+; CHECK-NEXT:    lui a0, 8256
+; CHECK-NEXT:    addi a0, a0, 2
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    li a0, 98
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
   %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 2, i32 2, i32 14, i32 8, i32 2>
   ret <8 x i8> %shuff
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index b2a9813e50a1868..e6868abdb5b1d71 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1099,99 +1099,52 @@ define void @urem_v2i64(ptr %x, ptr %y) {
 }
 
 define void @mulhu_v16i8(ptr %x) {
-; RV32-LABEL: mulhu_v16i8:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    lui a1, 3
-; RV32-NEXT:    addi a1, a1, -2044
-; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vmv.v.i v9, 0
-; RV32-NEXT:    li a1, -128
-; RV32-NEXT:    vmerge.vxm v10, v9, a1, v0
-; RV32-NEXT:    lui a1, 1
-; RV32-NEXT:    addi a2, a1, 32
-; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a2
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    lui a2, %hi(.LCPI65_0)
-; RV32-NEXT:    addi a2, a2, %lo(.LCPI65_0)
-; RV32-NEXT:    vle8.v v11, (a2)
-; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32-NEXT:    vsrl.vv v9, v8, v9
-; RV32-NEXT:    vmulhu.vv v9, v9, v11
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    vmulhu.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    li a2, 513
-; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a2
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vmv.v.i v9, 4
-; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32-NEXT:    addi a1, a1, 78
-; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vmerge.vim v9, v9, 3, v0
-; RV32-NEXT:    lui a1, 8
-; RV32-NEXT:    addi a1, a1, 304
-; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vmerge.vim v9, v9, 2, v0
-; RV32-NEXT:    vsrl.vv v8, v8, v9
-; RV32-NEXT:    vse8.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhu_v16i8:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    lui a1, 3
-; RV64-NEXT:    addiw a1, a1, -2044
-; RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a1
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vmv.v.i v9, 0
-; RV64-NEXT:    li a1, -128
-; RV64-NEXT:    vmerge.vxm v10, v9, a1, v0
-; RV64-NEXT:    lui a1, 1
-; RV64-NEXT:    addiw a2, a1, 32
-; RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a2
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    lui a2, %hi(.LCPI65_0)
-; RV64-NEXT:    addi a2, a2, %lo(.LCPI65_0)
-; RV64-NEXT:    vle8.v v11, (a2)
-; RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64-NEXT:    vsrl.vv v9, v8, v9
-; RV64-NEXT:    vmulhu.vv v9, v9, v11
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    vmulhu.vv v8, v8, v10
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    li a2, 513
-; RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a2
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vmv.v.i v9, 4
-; RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64-NEXT:    addiw a1, a1, 78
-; RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a1
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vmerge.vim v9, v9, 3, v0
-; RV64-NEXT:    lui a1, 8
-; RV64-NEXT:    addiw a1, a1, 304
-; RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a1
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vmerge.vim v9, v9, 2, v0
-; RV64-NEXT:    vsrl.vv v8, v8, v9
-; RV64-NEXT:    vse8.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mulhu_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    lui a1, 3
+; CHECK-NEXT:    addi a1, a1, -2044
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v9, 0
+; CHECK-NEXT:    li a1, -128
+; CHECK-NEXT:    vmerge.vxm v10, v9, a1, v0
+; CHECK-NEXT:    lui a1, 1
+; CHECK-NEXT:    addi a2, a1, 32
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a2
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    lui a2, %hi(.LCPI65_0)
+; CHECK-NEXT:    addi a2, a2, %lo(.LCPI65_0)
+; CHECK-NEXT:    vle8.v v11, (a2)
+; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT:    vsrl.vv v9, v8, v9
+; CHECK-NEXT:    vmulhu.vv v9, v9, v11
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    vmulhu.vv v8, v8, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    li a2, 513
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a2
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v9, 4
+; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT:    addi a1, a1, 78
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vmerge.vim v9, v9, 3, v0
+; CHECK-NEXT:    lui a1, 8
+; CHECK-NEXT:    addi a1, a1, 304
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vmerge.vim v9, v9, 2, v0
+; CHECK-NEXT:    vsrl.vv v8, v8, v9
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = udiv <16 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25>
   store <16 x i8> %b, ptr %x
@@ -1238,53 +1191,29 @@ define void @mulhu_v8i16(ptr %x) {
 }
 
 define void @mulhu_v6i16(ptr %x) {
-; RV32-LABEL: mulhu_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vadd.vi v9, v9, 12
-; RV32-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vdivu.vv v9, v10, v9
-; RV32-NEXT:    lui a1, 45217
-; RV32-NEXT:    addi a1, a1, -1785
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v10, a1
-; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsext.vf2 v11, v10
-; RV32-NEXT:    vdivu.vv v8, v8, v11
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v9, 4
-; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhu_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vadd.vi v9, v9, 12
-; RV64-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 4
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vdivu.vv v9, v10, v9
-; RV64-NEXT:    lui a1, 45217
-; RV64-NEXT:    addiw a1, a1, -1785
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v10, a1
-; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsext.vf2 v11, v10
-; RV64-NEXT:    vdivu.vv v8, v8, v11
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v9, 4
-; RV64-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-NEXT:    vse16.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mulhu_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vadd.vi v9, v9, 12
+; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v10, v8, 4
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vdivu.vv v9, v10, v9
+; CHECK-NEXT:    lui a1, 45217
+; CHECK-NEXT:    addi a1, a1, -1785
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v11, v10
+; CHECK-NEXT:    vdivu.vv v8, v8, v11
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 4
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = udiv <6 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13>
   store <6 x i16> %b, ptr %x
@@ -1292,55 +1221,30 @@ define void @mulhu_v6i16(ptr %x) {
 }
 
 define void @mulhu_v4i32(ptr %x) {
-; RV32-LABEL: mulhu_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    lui a1, 524288
-; RV32-NEXT:    vmv.s.x v9, a1
-; RV32-NEXT:    vmv.v.i v10, 0
-; RV32-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
-; RV32-NEXT:    vslideup.vi v10, v9, 2
-; RV32-NEXT:    lui a1, %hi(.LCPI68_0)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI68_0)
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vle32.v v9, (a1)
-; RV32-NEXT:    vmulhu.vv v9, v8, v9
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    vmulhu.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 4128
-; RV32-NEXT:    addi a1, a1, 514
-; RV32-NEXT:    vmv.s.x v9, a1
-; RV32-NEXT:    vsext.vf4 v10, v9
-; RV32-NEXT:    vsrl.vv v8, v8, v10
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhu_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    lui a1, 524288
-; RV64-NEXT:    vmv.s.x v9, a1
-; RV64-NEXT:    vmv.v.i v10, 0
-; RV64-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
-; RV64-NEXT:    vslideup.vi v10, v9, 2
-; RV64-NEXT:    lui a1, %hi(.LCPI68_0)
-; RV64-NEXT:    addi a1, a1, %lo(.LCPI68_0)
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vle32.v v9, (a1)
-; RV64-NEXT:    vmulhu.vv v9, v8, v9
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    vmulhu.vv v8, v8, v10
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a1, 4128
-; RV64-NEXT:    addiw a1, a1, 514
-; RV64-NEXT:    vmv.s.x v9, a1
-; RV64-NEXT:    vsext.vf4 v10, v9
-; RV64-NEXT:    vsrl.vv v8, v8, v10
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mulhu_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    lui a1, 524288
+; CHECK-NEXT:    vmv.s.x v9, a1
+; CHECK-NEXT:    vmv.v.i v10, 0
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v10, v9, 2
+; CHECK-NEXT:    lui a1, %hi(.LCPI68_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI68_0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v9, (a1)
+; CHECK-NEXT:    vmulhu.vv v9, v8, v9
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    vmulhu.vv v8, v8, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    lui a1, 4128
+; CHECK-NEXT:    addi a1, a1, 514
+; CHECK-NEXT:    vmv.s.x v9, a1
+; CHECK-NEXT:    vsext.vf4 v10, v9
+; CHECK-NEXT:    vsrl.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = udiv <4 x i32> %a, <i32 5, i32 6, i32 7, i32 9>
   store <4 x i32> %b, ptr %x
@@ -1397,45 +1301,25 @@ define void @mulhu_v2i64(ptr %x) {
 }
 
 define void @mulhs_v16i8(ptr %x) {
-; RV32-LABEL: mulhs_v16i8:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    li a1, -123
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, -1452
-; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    li a1, 57
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vmerge.vxm v9, v9, a1, v0
-; RV32-NEXT:    vmulhu.vv v8, v8, v9
-; RV32-NEXT:    vmv.v.i v9, 7
-; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32-NEXT:    vsrl.vv v8, v8, v9
-; RV32-NEXT:    vse8.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhs_v16i8:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vle8.v v8, (a0)
-; RV64-NEXT:    li a1, -123
-; RV64-NEXT:    vmv.v.x v9, a1
-; RV64-NEXT:    lui a1, 5
-; RV64-NEXT:    addiw a1, a1, -1452
-; RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a1
-; RV64-NEXT:    li a1, 57
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vmerge.vxm v9, v9, a1, v0
-; RV64-NEXT:    vmulhu.vv v8, v8, v9
-; RV64-NEXT:    vmv.v.i v9, 7
-; RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV64-NEXT:    vsrl.vv v8, v8, v9
-; RV64-NEXT:    vse8.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mulhs_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    li a1, -123
+; CHECK-NEXT:    vmv.v.x v9, a1
+; CHECK-NEXT:    lui a1, 5
+; CHECK-NEXT:    addi a1, a1, -1452
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    li a1, 57
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vmerge.vxm v9, v9, a1, v0
+; CHECK-NEXT:    vmulhu.vv v8, v8, v9
+; CHECK-NEXT:    vmv.v.i v9, 7
+; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT:    vsrl.vv v8, v8, v9
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i8>, ptr %x
   %b = udiv <16 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
   store <16 x i8> %b, ptr %x
@@ -1443,43 +1327,24 @@ define void @mulhs_v16i8(ptr %x) {
 }
 
 define void @mulhs_v8i16(ptr %x) {
-; RV32-LABEL: mulhs_v8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, -1755
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    li a1, 105
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    lui a1, 1048571
-; RV32-NEXT:    addi a1, a1, 1755
-; RV32-NEXT:    vmerge.vxm v9, v9, a1, v0
-; RV32-NEXT:    vmulh.vv v8, v8, v9
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vsrl.vi v9, v8, 15
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhs_v8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    lui a1, 5
-; RV64-NEXT:    addiw a1, a1, -1755
-; RV64-NEXT:    vmv.v.x v9, a1
-; RV64-NEXT:    li a1, 105
-; RV64-NEXT:    vmv.s.x v0, a1
-; RV64-NEXT:    lui a1, 1048571
-; RV64-NEXT:    addiw a1, a1, 1755
-; RV64-NEXT:    vmerge.vxm v9, v9, a1, v0
-; RV64-NEXT:    vmulh.vv v8, v8, v9
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vsrl.vi v9, v8, 15
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    vse16.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mulhs_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    lui a1, 5
+; CHECK-NEXT:    addi a1, a1, -1755
+; CHECK-NEXT:    vmv.v.x v9, a1
+; CHECK-NEXT:    li a1, 105
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    lui a1, 1048571
+; CHECK-NEXT:    addi a1, a1, 1755
+; CHECK-NEXT:    vmerge.vxm v9, v9, a1, v0
+; CHECK-NEXT:    vmulh.vv v8, v8, v9
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsrl.vi v9, v8, 15
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = sdiv <8 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7>
   store <8 x i16> %b, ptr %x
@@ -1487,57 +1352,31 @@ define void @mulhs_v8i16(ptr %x) {
 }
 
 define void @mulhs_v6i16(ptr %x) {
-; RV32-LABEL: mulhs_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.v.i v9, 7
-; RV32-NEXT:    vid.v v10
-; RV32-NEXT:    li a1, -14
-; RV32-NEXT:    vmadd.vx v10, a1, v9
-; RV32-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 4
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vdiv.vv v9, v9, v10
-; RV32-NEXT:    lui a1, 1020016
-; RV32-NEXT:    addi a1, a1, 2041
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v10, a1
-; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsext.vf2 v11, v10
-; RV32-NEXT:    vdiv.vv v8, v8, v11
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v9, 4
-; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhs_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.v.i v9, 7
-; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    li a1, -14
-; RV64-NEXT:    vmadd.vx v10, a1, v9
-; RV64-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v9, v8, 4
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vdiv.vv v9, v9, v10
-; RV64-NEXT:    lui a1, 1020016
-; RV64-NEXT:    addiw a1, a1, 2041
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v10, a1
-; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsext.vf2 v11, v10
-; RV64-NEXT:    vdiv.vv v8, v8, v11
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v9, 4
-; RV64-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV64-NEXT:    vse16.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mulhs_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.i v9, 7
+; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    li a1, -14
+; CHECK-NEXT:    vmadd.vx v10, a1, v9
+; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 4
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vdiv.vv v9, v9, v10
+; CHECK-NEXT:    lui a1, 1020016
+; CHECK-NEXT:    addi a1, a1, 2041
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v11, v10
+; CHECK-NEXT:    vdiv.vv v8, v8, v11
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 4
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = sdiv <6 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7>
   store <6 x i16> %b, ptr %x
@@ -5027,105 +4866,55 @@ define void @extract_v4i64(ptr %x, ptr %y) {
 }
 
 define void @mulhu_v32i8(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhu_v32i8:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    li a1, 32
-; LMULMAX2-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vmv.v.i v10, 0
-; LMULMAX2-RV32-NEXT:    lui a2, 163907
-; LMULMAX2-RV32-NEXT:    addi a2, a2, -2044
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT:    li a2, -128
-; LMULMAX2-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v12, v10, a2, v0
-; LMULMAX2-RV32-NEXT:    lui a2, 66049
-; LMULMAX2-RV32-NEXT:    addi a2, a2, 32
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    lui a2, %hi(.LCPI181_0)
-; LMULMAX2-RV32-NEXT:    addi a2, a2, %lo(.LCPI181_0)
-; LMULMAX2-RV32-NEXT:    vle8.v v14, (a2)
-; LMULMAX2-RV32-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV32-NEXT:    vsrl.vv v10, v8, v10
-; LMULMAX2-RV32-NEXT:    vmulhu.vv v10, v10, v14
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vmulhu.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vmv.v.i v10, 4
-; LMULMAX2-RV32-NEXT:    lui a2, 8208
-; LMULMAX2-RV32-NEXT:    addi a2, a2, 513
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV32-NEXT:    lui a2, 66785
-; LMULMAX2-RV32-NEXT:    addi a2, a2, 78
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmerge.vim v10, v10, 3, v0
-; LMULMAX2-RV32-NEXT:    lui a2, 529160
-; LMULMAX2-RV32-NEXT:    addi a2, a2, 304
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmerge.vim v10, v10, 2, v0
-; LMULMAX2-RV32-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: mulhu_v32i8:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    li a1, 32
-; LMULMAX2-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vmv.v.i v10, 0
-; LMULMAX2-RV64-NEXT:    lui a2, 163907
-; LMULMAX2-RV64-NEXT:    addiw a2, a2, -2044
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT:    li a2, -128
-; LMULMAX2-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmerge.vxm v12, v10, a2, v0
-; LMULMAX2-RV64-NEXT:    lui a2, 66049
-; LMULMAX2-RV64-NEXT:    addiw a2, a2, 32
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    lui a2, %hi(.LCPI181_0)
-; LMULMAX2-RV64-NEXT:    addi a2, a2, %lo(.LCPI181_0)
-; LMULMAX2-RV64-NEXT:    vle8.v v14, (a2)
-; LMULMAX2-RV64-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV64-NEXT:    vsrl.vv v10, v8, v10
-; LMULMAX2-RV64-NEXT:    vmulhu.vv v10, v10, v14
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vmulhu.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vmv.v.i v10, 4
-; LMULMAX2-RV64-NEXT:    lui a2, 8208
-; LMULMAX2-RV64-NEXT:    addiw a2, a2, 513
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV64-NEXT:    lui a2, 66785
-; LMULMAX2-RV64-NEXT:    addiw a2, a2, 78
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmerge.vim v10, v10, 3, v0
-; LMULMAX2-RV64-NEXT:    lui a2, 529160
-; LMULMAX2-RV64-NEXT:    addiw a2, a2, 304
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmerge.vim v10, v10, 2, v0
-; LMULMAX2-RV64-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
+; LMULMAX2-LABEL: mulhu_v32i8:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    li a1, 32
+; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    vle8.v v8, (a0)
+; LMULMAX2-NEXT:    vmv.v.i v10, 0
+; LMULMAX2-NEXT:    lui a2, 163907
+; LMULMAX2-NEXT:    addi a2, a2, -2044
+; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a2
+; LMULMAX2-NEXT:    li a2, -128
+; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    vmerge.vxm v12, v10, a2, v0
+; LMULMAX2-NEXT:    lui a2, 66049
+; LMULMAX2-NEXT:    addi a2, a2, 32
+; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a2
+; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    lui a2, %hi(.LCPI181_0)
+; LMULMAX2-NEXT:    addi a2, a2, %lo(.LCPI181_0)
+; LMULMAX2-NEXT:    vle8.v v14, (a2)
+; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
+; LMULMAX2-NEXT:    vsrl.vv v10, v8, v10
+; LMULMAX2-NEXT:    vmulhu.vv v10, v10, v14
+; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX2-NEXT:    vmulhu.vv v8, v8, v12
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    vmv.v.i v10, 4
+; LMULMAX2-NEXT:    lui a2, 8208
+; LMULMAX2-NEXT:    addi a2, a2, 513
+; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a2
+; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
+; LMULMAX2-NEXT:    lui a2, 66785
+; LMULMAX2-NEXT:    addi a2, a2, 78
+; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a2
+; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    vmerge.vim v10, v10, 3, v0
+; LMULMAX2-NEXT:    lui a2, 529160
+; LMULMAX2-NEXT:    addi a2, a2, 304
+; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a2
+; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    vmerge.vim v10, v10, 2, v0
+; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
+; LMULMAX2-NEXT:    vse8.v v8, (a0)
+; LMULMAX2-NEXT:    ret
 ;
 ; LMULMAX1-LABEL: mulhu_v32i8:
 ; LMULMAX1:       # %bb.0:
@@ -5242,57 +5031,31 @@ define void @mulhu_v16i16(ptr %x) {
 }
 
 define void @mulhu_v8i32(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhu_v8i32:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    li a1, 68
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI183_0)
-; LMULMAX2-RV32-NEXT:    addi a1, a1, %lo(.LCPI183_0)
-; LMULMAX2-RV32-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-RV32-NEXT:    vmv.v.i v12, 0
-; LMULMAX2-RV32-NEXT:    lui a1, 524288
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v12, v12, a1, v0
-; LMULMAX2-RV32-NEXT:    vmulhu.vv v10, v8, v10
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vmulhu.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 4128
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 514
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vsext.vf4 v12, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: mulhu_v8i32:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    li a1, 68
-; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI183_0)
-; LMULMAX2-RV64-NEXT:    addi a1, a1, %lo(.LCPI183_0)
-; LMULMAX2-RV64-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-RV64-NEXT:    vmv.v.i v12, 0
-; LMULMAX2-RV64-NEXT:    lui a1, 524288
-; LMULMAX2-RV64-NEXT:    vmerge.vxm v12, v12, a1, v0
-; LMULMAX2-RV64-NEXT:    vmulhu.vv v10, v8, v10
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vmulhu.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 4128
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 514
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vsext.vf4 v12, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
+; LMULMAX2-LABEL: mulhu_v8i32:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; LMULMAX2-NEXT:    vle32.v v8, (a0)
+; LMULMAX2-NEXT:    li a1, 68
+; LMULMAX2-NEXT:    vmv.s.x v0, a1
+; LMULMAX2-NEXT:    lui a1, %hi(.LCPI183_0)
+; LMULMAX2-NEXT:    addi a1, a1, %lo(.LCPI183_0)
+; LMULMAX2-NEXT:    vle32.v v10, (a1)
+; LMULMAX2-NEXT:    vmv.v.i v12, 0
+; LMULMAX2-NEXT:    lui a1, 524288
+; LMULMAX2-NEXT:    vmerge.vxm v12, v12, a1, v0
+; LMULMAX2-NEXT:    vmulhu.vv v10, v8, v10
+; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX2-NEXT:    vmulhu.vv v8, v8, v12
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    lui a1, 4128
+; LMULMAX2-NEXT:    addi a1, a1, 514
+; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; LMULMAX2-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; LMULMAX2-NEXT:    vsext.vf4 v12, v10
+; LMULMAX2-NEXT:    vsrl.vv v8, v8, v12
+; LMULMAX2-NEXT:    vse32.v v8, (a0)
+; LMULMAX2-NEXT:    ret
 ;
 ; LMULMAX1-RV32-LABEL: mulhu_v8i32:
 ; LMULMAX1-RV32:       # %bb.0:
@@ -5334,7 +5097,7 @@ define void @mulhu_v8i32(ptr %x) {
 ; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV64-NEXT:    vle32.v v9, (a1)
 ; LMULMAX1-RV64-NEXT:    lui a2, 36976
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, 1541
+; LMULMAX1-RV64-NEXT:    addi a2, a2, 1541
 ; LMULMAX1-RV64-NEXT:    vmv.s.x v10, a2
 ; LMULMAX1-RV64-NEXT:    vsext.vf4 v11, v10
 ; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v9, v11
@@ -5398,7 +5161,7 @@ define void @mulhu_v4i64(ptr %x) {
 ; LMULMAX2-RV64-NEXT:    vmulhu.vv v8, v8, v12
 ; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX2-RV64-NEXT:    lui a1, 12320
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 513
+; LMULMAX2-RV64-NEXT:    addi a1, a1, 513
 ; LMULMAX2-RV64-NEXT:    vmv.s.x v10, a1
 ; LMULMAX2-RV64-NEXT:    vsext.vf8 v12, v10
 ; LMULMAX2-RV64-NEXT:    vsrl.vv v8, v8, v12
@@ -5481,85 +5244,45 @@ define void @mulhu_v4i64(ptr %x) {
 }
 
 define void @mulhs_v32i8(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhs_v32i8:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    li a1, 32
-; LMULMAX2-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vmv.v.i v10, 7
-; LMULMAX2-RV32-NEXT:    lui a2, 304453
-; LMULMAX2-RV32-NEXT:    addi a2, a2, -1452
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV32-NEXT:    li a1, -123
-; LMULMAX2-RV32-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV32-NEXT:    li a1, 57
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v12, v12, a1, v0
-; LMULMAX2-RV32-NEXT:    vmulhu.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: mulhs_v32i8:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    li a1, 32
-; LMULMAX2-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vmv.v.i v10, 7
-; LMULMAX2-RV64-NEXT:    lui a2, 304453
-; LMULMAX2-RV64-NEXT:    addiw a2, a2, -1452
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV64-NEXT:    li a1, -123
-; LMULMAX2-RV64-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV64-NEXT:    li a1, 57
-; LMULMAX2-RV64-NEXT:    vmerge.vxm v12, v12, a1, v0
-; LMULMAX2-RV64-NEXT:    vmulhu.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: mulhs_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    lui a2, 5
-; LMULMAX1-RV32-NEXT:    addi a2, a2, -1452
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a2
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v10, -9
-; LMULMAX1-RV32-NEXT:    vmerge.vim v10, v10, 9, v0
-; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    ret
+; LMULMAX2-LABEL: mulhs_v32i8:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    li a1, 32
+; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    vle8.v v8, (a0)
+; LMULMAX2-NEXT:    vmv.v.i v10, 7
+; LMULMAX2-NEXT:    lui a2, 304453
+; LMULMAX2-NEXT:    addi a2, a2, -1452
+; LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT:    vmv.s.x v0, a2
+; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
+; LMULMAX2-NEXT:    li a1, -123
+; LMULMAX2-NEXT:    vmv.v.x v12, a1
+; LMULMAX2-NEXT:    li a1, 57
+; LMULMAX2-NEXT:    vmerge.vxm v12, v12, a1, v0
+; LMULMAX2-NEXT:    vmulhu.vv v8, v8, v12
+; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
+; LMULMAX2-NEXT:    vse8.v v8, (a0)
+; LMULMAX2-NEXT:    ret
 ;
-; LMULMAX1-RV64-LABEL: mulhs_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    lui a2, 5
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, -1452
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.s.x v0, a2
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v10, -9
-; LMULMAX1-RV64-NEXT:    vmerge.vim v10, v10, 9, v0
-; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; LMULMAX1-LABEL: mulhs_v32i8:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; LMULMAX1-NEXT:    vle8.v v8, (a0)
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    vle8.v v9, (a1)
+; LMULMAX1-NEXT:    lui a2, 5
+; LMULMAX1-NEXT:    addi a2, a2, -1452
+; LMULMAX1-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; LMULMAX1-NEXT:    vmv.s.x v0, a2
+; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; LMULMAX1-NEXT:    vmv.v.i v10, -9
+; LMULMAX1-NEXT:    vmerge.vim v10, v10, 9, v0
+; LMULMAX1-NEXT:    vdivu.vv v9, v9, v10
+; LMULMAX1-NEXT:    vdivu.vv v8, v8, v10
+; LMULMAX1-NEXT:    vse8.v v8, (a0)
+; LMULMAX1-NEXT:    vse8.v v9, (a1)
+; LMULMAX1-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = udiv <32 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
   store <32 x i8> %b, ptr %x
@@ -5567,45 +5290,25 @@ define void @mulhs_v32i8(ptr %x) {
 }
 
 define void @mulhs_v16i16(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhs_v16i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    lui a1, 5
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -1755
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT:    lui a1, 7
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -1687
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT:    lui a1, 1048571
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1755
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v10, v10, a1, v0
-; LMULMAX2-RV32-NEXT:    vmulh.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 15
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: mulhs_v16i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    lui a1, 5
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -1755
-; LMULMAX2-RV64-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV64-NEXT:    lui a1, 7
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -1687
-; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV64-NEXT:    lui a1, 1048571
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1755
-; LMULMAX2-RV64-NEXT:    vmerge.vxm v10, v10, a1, v0
-; LMULMAX2-RV64-NEXT:    vmulh.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 15
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
+; LMULMAX2-LABEL: mulhs_v16i16:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT:    vle16.v v8, (a0)
+; LMULMAX2-NEXT:    lui a1, 5
+; LMULMAX2-NEXT:    addi a1, a1, -1755
+; LMULMAX2-NEXT:    vmv.v.x v10, a1
+; LMULMAX2-NEXT:    lui a1, 7
+; LMULMAX2-NEXT:    addi a1, a1, -1687
+; LMULMAX2-NEXT:    vmv.s.x v0, a1
+; LMULMAX2-NEXT:    lui a1, 1048571
+; LMULMAX2-NEXT:    addi a1, a1, 1755
+; LMULMAX2-NEXT:    vmerge.vxm v10, v10, a1, v0
+; LMULMAX2-NEXT:    vmulh.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsra.vi v8, v8, 1
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 15
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    vse16.v v8, (a0)
+; LMULMAX2-NEXT:    ret
 ;
 ; LMULMAX1-LABEL: mulhs_v16i16:
 ; LMULMAX1:       # %bb.0:
@@ -5764,14 +5467,14 @@ define void @mulhs_v4i64(ptr %x) {
 ; LMULMAX2-RV64-NEXT:    vmerge.vxm v10, v10, a1, v0
 ; LMULMAX2-RV64-NEXT:    vmulh.vv v10, v8, v10
 ; LMULMAX2-RV64-NEXT:    lui a1, 1044496
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -256
+; LMULMAX2-RV64-NEXT:    addi a1, a1, -256
 ; LMULMAX2-RV64-NEXT:    vmv.s.x v12, a1
 ; LMULMAX2-RV64-NEXT:    vsext.vf8 v14, v12
 ; LMULMAX2-RV64-NEXT:    vmadd.vv v14, v8, v10
 ; LMULMAX2-RV64-NEXT:    li a1, 63
 ; LMULMAX2-RV64-NEXT:    vsrl.vx v8, v14, a1
 ; LMULMAX2-RV64-NEXT:    lui a1, 4096
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 256
+; LMULMAX2-RV64-NEXT:    addi a1, a1, 256
 ; LMULMAX2-RV64-NEXT:    vmv.s.x v10, a1
 ; LMULMAX2-RV64-NEXT:    vsext.vf8 v12, v10
 ; LMULMAX2-RV64-NEXT:    vsra.vv v10, v14, v12
@@ -8356,33 +8059,19 @@ define void @mulhu_vx_v16i8(ptr %x) {
 }
 
 define void @mulhu_vx_v8i16(ptr %x) {
-; RV32-LABEL: mulhu_vx_v8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    lui a1, 2
-; RV32-NEXT:    addi a1, a1, 1171
-; RV32-NEXT:    vmulhu.vx v9, v8, a1
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v8, v8, 1
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhu_vx_v8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    lui a1, 2
-; RV64-NEXT:    addiw a1, a1, 1171
-; RV64-NEXT:    vmulhu.vx v9, v8, a1
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v8, v8, 1
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vse16.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mulhu_vx_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    lui a1, 2
+; CHECK-NEXT:    addi a1, a1, 1171
+; CHECK-NEXT:    vmulhu.vx v9, v8, a1
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v8, v8, 1
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
   store <8 x i16> %b, ptr %x
@@ -8390,27 +8079,16 @@ define void @mulhu_vx_v8i16(ptr %x) {
 }
 
 define void @mulhu_vx_v4i32(ptr %x) {
-; RV32-LABEL: mulhu_vx_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    lui a1, 838861
-; RV32-NEXT:    addi a1, a1, -819
-; RV32-NEXT:    vmulhu.vx v8, v8, a1
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhu_vx_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    lui a1, 838861
-; RV64-NEXT:    addiw a1, a1, -819
-; RV64-NEXT:    vmulhu.vx v8, v8, a1
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mulhu_vx_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    lui a1, 838861
+; CHECK-NEXT:    addi a1, a1, -819
+; CHECK-NEXT:    vmulhu.vx v8, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i32>, ptr %x
   %b = udiv <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   store <4 x i32> %b, ptr %x
@@ -8472,31 +8150,18 @@ define void @mulhs_vx_v16i8(ptr %x) {
 }
 
 define void @mulhs_vx_v8i16(ptr %x) {
-; RV32-LABEL: mulhs_vx_v8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, -1755
-; RV32-NEXT:    vmulh.vx v8, v8, a1
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vsrl.vi v9, v8, 15
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhs_vx_v8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    lui a1, 5
-; RV64-NEXT:    addiw a1, a1, -1755
-; RV64-NEXT:    vmulh.vx v8, v8, a1
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vsrl.vi v9, v8, 15
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    vse16.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mulhs_vx_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    lui a1, 5
+; CHECK-NEXT:    addi a1, a1, -1755
+; CHECK-NEXT:    vmulh.vx v8, v8, a1
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsrl.vi v9, v8, 15
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %b = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
   store <8 x i16> %b, ptr %x
@@ -8522,7 +8187,7 @@ define void @mulhs_vx_v4i32(ptr %x) {
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vle32.v v8, (a0)
 ; RV64-NEXT:    lui a1, 629146
-; RV64-NEXT:    addiw a1, a1, -1639
+; RV64-NEXT:    addi a1, a1, -1639
 ; RV64-NEXT:    vmulh.vx v8, v8, a1
 ; RV64-NEXT:    vsra.vi v8, v8, 1
 ; RV64-NEXT:    vsrl.vi v9, v8, 31
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index b3099f6b57056ed..eeb8e517d01d2d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -845,7 +845,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vrgatherei16.vv v8, v24, v16, v0.t
 ; RV64-NEXT:    lui a1, 16
-; RV64-NEXT:    addiw a1, a1, 7
+; RV64-NEXT:    addi a1, a1, 7
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
@@ -917,7 +917,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    vsetivli zero, 5, e64, m4, tu, ma
 ; RV64-NEXT:    vmv.v.v v4, v8
 ; RV64-NEXT:    lui a1, 112
-; RV64-NEXT:    addiw a1, a1, 1
+; RV64-NEXT:    addi a1, a1, 1
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index eae615db137eaac..d23c494ba37a061 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -426,21 +426,21 @@ define <10 x i1> @buildvec_mask_v10i1() {
 }
 
 define <16 x i1> @buildvec_mask_v16i1() {
-; CHECK-RV32-LABEL: buildvec_mask_v16i1:
-; CHECK-RV32:       # %bb.0:
-; CHECK-RV32-NEXT:    lui a0, 11
-; CHECK-RV32-NEXT:    addi a0, a0, 1718
-; CHECK-RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-RV32-NEXT:    vmv.s.x v0, a0
-; CHECK-RV32-NEXT:    ret
-;
-; CHECK-RV64-LABEL: buildvec_mask_v16i1:
-; CHECK-RV64:       # %bb.0:
-; CHECK-RV64-NEXT:    lui a0, 11
-; CHECK-RV64-NEXT:    addiw a0, a0, 1718
-; CHECK-RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-RV64-NEXT:    vmv.s.x v0, a0
-; CHECK-RV64-NEXT:    ret
+; CHECK-LABEL: buildvec_mask_v16i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 11
+; CHECK-NEXT:    addi a0, a0, 1718
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    ret
+;
+; ZVE32F-LABEL: buildvec_mask_v16i1:
+; ZVE32F:       # %bb.0:
+; ZVE32F-NEXT:    lui a0, 11
+; ZVE32F-NEXT:    addi a0, a0, 1718
+; ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZVE32F-NEXT:    vmv.s.x v0, a0
+; ZVE32F-NEXT:    ret
   ret <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>
 }
 
@@ -478,7 +478,7 @@ define <32 x i1> @buildvec_mask_v32i1() {
 ; RV64-LMULMAX1-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v0, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 11
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v8, a0
 ; RV64-LMULMAX1-NEXT:    ret
 ;
@@ -493,7 +493,7 @@ define <32 x i1> @buildvec_mask_v32i1() {
 ; RV64-LMULMAX2-LABEL: buildvec_mask_v32i1:
 ; RV64-LMULMAX2:       # %bb.0:
 ; RV64-LMULMAX2-NEXT:    lui a0, 748384
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
 ; RV64-LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v0, a0
 ; RV64-LMULMAX2-NEXT:    ret
@@ -509,7 +509,7 @@ define <32 x i1> @buildvec_mask_v32i1() {
 ; RV64-LMULMAX4-LABEL: buildvec_mask_v32i1:
 ; RV64-LMULMAX4:       # %bb.0:
 ; RV64-LMULMAX4-NEXT:    lui a0, 748384
-; RV64-LMULMAX4-NEXT:    addiw a0, a0, 1776
+; RV64-LMULMAX4-NEXT:    addi a0, a0, 1776
 ; RV64-LMULMAX4-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; RV64-LMULMAX4-NEXT:    vmv.s.x v0, a0
 ; RV64-LMULMAX4-NEXT:    ret
@@ -525,10 +525,18 @@ define <32 x i1> @buildvec_mask_v32i1() {
 ; RV64-LMULMAX8-LABEL: buildvec_mask_v32i1:
 ; RV64-LMULMAX8:       # %bb.0:
 ; RV64-LMULMAX8-NEXT:    lui a0, 748384
-; RV64-LMULMAX8-NEXT:    addiw a0, a0, 1776
+; RV64-LMULMAX8-NEXT:    addi a0, a0, 1776
 ; RV64-LMULMAX8-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; RV64-LMULMAX8-NEXT:    vmv.s.x v0, a0
 ; RV64-LMULMAX8-NEXT:    ret
+;
+; ZVE32F-LABEL: buildvec_mask_v32i1:
+; ZVE32F:       # %bb.0:
+; ZVE32F-NEXT:    lui a0, 748384
+; ZVE32F-NEXT:    addi a0, a0, 1776
+; ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; ZVE32F-NEXT:    vmv.s.x v0, a0
+; ZVE32F-NEXT:    ret
   ret <32 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>
 }
 
@@ -553,10 +561,10 @@ define <64 x i1> @buildvec_mask_v64i1() {
 ; RV64-LMULMAX1-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v0, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 4
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, -1793
+; RV64-LMULMAX1-NEXT:    addi a0, a0, -1793
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v9, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 11
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v8, a0
 ; RV64-LMULMAX1-NEXT:    vmv1r.v v10, v8
 ; RV64-LMULMAX1-NEXT:    ret
@@ -575,11 +583,11 @@ define <64 x i1> @buildvec_mask_v64i1() {
 ; RV64-LMULMAX2-LABEL: buildvec_mask_v64i1:
 ; RV64-LMULMAX2:       # %bb.0:
 ; RV64-LMULMAX2-NEXT:    lui a0, 748384
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
 ; RV64-LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v0, a0
 ; RV64-LMULMAX2-NEXT:    lui a0, 748388
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, -1793
+; RV64-LMULMAX2-NEXT:    addi a0, a0, -1793
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v8, a0
 ; RV64-LMULMAX2-NEXT:    ret
 ;
@@ -622,6 +630,18 @@ define <64 x i1> @buildvec_mask_v64i1() {
 ; RV64-LMULMAX8-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV64-LMULMAX8-NEXT:    vle64.v v0, (a0)
 ; RV64-LMULMAX8-NEXT:    ret
+;
+; ZVE32F-LABEL: buildvec_mask_v64i1:
+; ZVE32F:       # %bb.0:
+; ZVE32F-NEXT:    lui a0, 748388
+; ZVE32F-NEXT:    addi a0, a0, -1793
+; ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
+; ZVE32F-NEXT:    vmv.v.x v0, a0
+; ZVE32F-NEXT:    lui a0, 748384
+; ZVE32F-NEXT:    addi a0, a0, 1776
+; ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
+; ZVE32F-NEXT:    vmv.s.x v0, a0
+; ZVE32F-NEXT:    ret
   ret <64 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>
 }
 
@@ -654,16 +674,16 @@ define <128 x i1> @buildvec_mask_v128i1() {
 ; RV64-LMULMAX1-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v0, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 11
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v8, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 8
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v12, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 4
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, -1793
+; RV64-LMULMAX1-NEXT:    addi a0, a0, -1793
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v9, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 14
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, 1722
+; RV64-LMULMAX1-NEXT:    addi a0, a0, 1722
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v14, a0
 ; RV64-LMULMAX1-NEXT:    vmv1r.v v10, v8
 ; RV64-LMULMAX1-NEXT:    vmv1r.v v11, v0
@@ -690,17 +710,17 @@ define <128 x i1> @buildvec_mask_v128i1() {
 ; RV64-LMULMAX2-LABEL: buildvec_mask_v128i1:
 ; RV64-LMULMAX2:       # %bb.0:
 ; RV64-LMULMAX2-NEXT:    lui a0, 748384
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
 ; RV64-LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v0, a0
 ; RV64-LMULMAX2-NEXT:    lui a0, 748388
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, -1793
+; RV64-LMULMAX2-NEXT:    addi a0, a0, -1793
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v8, a0
 ; RV64-LMULMAX2-NEXT:    lui a0, 551776
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v9, a0
 ; RV64-LMULMAX2-NEXT:    lui a0, 945060
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, -1793
+; RV64-LMULMAX2-NEXT:    addi a0, a0, -1793
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v10, a0
 ; RV64-LMULMAX2-NEXT:    ret
 ;
@@ -794,16 +814,16 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
 ; RV64-LMULMAX1-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v0, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 11
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v8, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 8
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v12, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 4
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, -1793
+; RV64-LMULMAX1-NEXT:    addi a0, a0, -1793
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v9, a0
 ; RV64-LMULMAX1-NEXT:    lui a0, 14
-; RV64-LMULMAX1-NEXT:    addiw a0, a0, 1722
+; RV64-LMULMAX1-NEXT:    addi a0, a0, 1722
 ; RV64-LMULMAX1-NEXT:    vmv.s.x v14, a0
 ; RV64-LMULMAX1-NEXT:    vmv1r.v v10, v8
 ; RV64-LMULMAX1-NEXT:    vmv1r.v v11, v0
@@ -830,17 +850,17 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
 ; RV64-LMULMAX2-LABEL: buildvec_mask_optsize_v128i1:
 ; RV64-LMULMAX2:       # %bb.0:
 ; RV64-LMULMAX2-NEXT:    lui a0, 748384
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
 ; RV64-LMULMAX2-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v0, a0
 ; RV64-LMULMAX2-NEXT:    lui a0, 748388
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, -1793
+; RV64-LMULMAX2-NEXT:    addi a0, a0, -1793
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v8, a0
 ; RV64-LMULMAX2-NEXT:    lui a0, 551776
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v9, a0
 ; RV64-LMULMAX2-NEXT:    lui a0, 945060
-; RV64-LMULMAX2-NEXT:    addiw a0, a0, -1793
+; RV64-LMULMAX2-NEXT:    addi a0, a0, -1793
 ; RV64-LMULMAX2-NEXT:    vmv.s.x v10, a0
 ; RV64-LMULMAX2-NEXT:    ret
 ;
@@ -895,3 +915,6 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
 ; ZVE32F-NEXT:    ret
   ret <128 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1>
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-RV32: {{.*}}
+; CHECK-RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 480e5c2f8f2b8b6..f8a8ffd3a07970f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -14507,7 +14507,7 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
 ; RV64V-LABEL: mgather_gather_4xSEW_partial_align:
 ; RV64V:       # %bb.0:
 ; RV64V-NEXT:    lui a1, 82176
-; RV64V-NEXT:    addiw a1, a1, 1024
+; RV64V-NEXT:    addi a1, a1, 1024
 ; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64V-NEXT:    vmv.s.x v9, a1
 ; RV64V-NEXT:    vluxei8.v v8, (a0), v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
index 8c96392f08a5dbe..dd9a1118ab821d4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
@@ -649,39 +649,22 @@ define i32 @reduce_smin_16xi32_prefix2(ptr %p) {
 }
 
 define i32 @reduce_smin_16xi32_prefix5(ptr %p) {
-; RV32-LABEL: reduce_smin_16xi32_prefix5:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, 524288
-; RV32-NEXT:    addi a1, a1, -1
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    vmv.s.x v10, a1
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 5
-; RV32-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 6
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 7
-; RV32-NEXT:    vredmin.vs v8, v8, v8
-; RV32-NEXT:    vmv.x.s a0, v8
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reduce_smin_16xi32_prefix5:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a1, 524288
-; RV64-NEXT:    addiw a1, a1, -1
-; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    vmv.s.x v10, a1
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 5
-; RV64-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 6
-; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 7
-; RV64-NEXT:    vredmin.vs v8, v8, v8
-; RV64-NEXT:    vmv.x.s a0, v8
-; RV64-NEXT:    ret
+; CHECK-LABEL: reduce_smin_16xi32_prefix5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 524288
+; CHECK-NEXT:    addi a1, a1, -1
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 5
+; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 6
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 7
+; CHECK-NEXT:    vredmin.vs v8, v8, v8
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    ret
   %v = load <16 x i32>, ptr %p, align 256
   %e0 = extractelement <16 x i32> %v, i32 0
   %e1 = extractelement <16 x i32> %v, i32 1
@@ -781,3 +764,6 @@ define i32 @reduce_umin_16xi32_prefix5(ptr %p) {
   %umin3 = call i32 @llvm.umin.i32(i32 %umin2, i32 %e4)
   ret i32 %umin3
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll
index 1bf832a229b218f..a1d2b5106d5a967 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll
@@ -37,69 +37,39 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
 }
 
 define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
-; RV32-LABEL: trn1.v16i8:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vid.v v11
-; RV32-NEXT:    vrgather.vv v10, v8, v11
-; RV32-NEXT:    vadd.vi v8, v11, -1
-; RV32-NEXT:    lui a0, 11
-; RV32-NEXT:    addi a0, a0, -1366
-; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a0
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; RV32-NEXT:    vrgather.vv v10, v9, v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: trn1.v16i8:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vid.v v11
-; RV64-NEXT:    vrgather.vv v10, v8, v11
-; RV64-NEXT:    vadd.vi v8, v11, -1
-; RV64-NEXT:    lui a0, 11
-; RV64-NEXT:    addiw a0, a0, -1366
-; RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a0
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; RV64-NEXT:    vrgather.vv v10, v9, v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: trn1.v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
+; CHECK-NEXT:    vadd.vi v8, v11, -1
+; CHECK-NEXT:    lui a0, 11
+; CHECK-NEXT:    addi a0, a0, -1366
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
   %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
   ret <16 x i8> %tmp0
 }
 
 define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
-; RV32-LABEL: trn2.v16i8:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT:    vid.v v11
-; RV32-NEXT:    vadd.vi v12, v11, 1
-; RV32-NEXT:    vrgather.vv v10, v8, v12
-; RV32-NEXT:    lui a0, 11
-; RV32-NEXT:    addi a0, a0, -1366
-; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a0
-; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; RV32-NEXT:    vrgather.vv v10, v9, v11, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: trn2.v16i8:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT:    vid.v v11
-; RV64-NEXT:    vadd.vi v12, v11, 1
-; RV64-NEXT:    vrgather.vv v10, v8, v12
-; RV64-NEXT:    lui a0, 11
-; RV64-NEXT:    addiw a0, a0, -1366
-; RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a0
-; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; RV64-NEXT:    vrgather.vv v10, v9, v11, v0.t
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: trn2.v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 1
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    lui a0, 11
+; CHECK-NEXT:    addi a0, a0, -1366
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
   %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
   ret <16 x i8> %tmp0
 }
@@ -379,3 +349,6 @@ define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) {
   %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x half> %tmp0
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
index 49daa4413035cf4..fd117f9e8ea0740 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
@@ -352,27 +352,16 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert2(<4 x i8> %v, i8 %b) {
 }
 
 define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) {
-; RV32-LABEL: vslide1up_4xi8_neg_incorrect_insert3:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 8208
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v10, a0
-; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv1r.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vslide1up_4xi8_neg_incorrect_insert3:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 8208
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v10, a0
-; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv1r.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8208
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
   %v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 5, i32 4, i32 5, i32 6>
   ret <4 x i8> %v2
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index fc0564827036763..32d26827f989e04 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -118,19 +118,12 @@ define void @store_constant_v2i8(ptr %p) {
 }
 
 define void @store_constant_v2i16(ptr %p) {
-; RV32-LABEL: store_constant_v2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, 96
-; RV32-NEXT:    addi a1, a1, 3
-; RV32-NEXT:    sw a1, 0(a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: store_constant_v2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a1, 96
-; RV64-NEXT:    addiw a1, a1, 3
-; RV64-NEXT:    sw a1, 0(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: store_constant_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 96
+; CHECK-NEXT:    addi a1, a1, 3
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
   store <2 x i16> <i16 3, i16 6>, ptr %p
   ret void
 }
@@ -150,87 +143,52 @@ define void @store_constant_v2i32(ptr %p) {
 }
 
 define void @store_constant_v4i8(ptr %p) {
-; RV32-LABEL: store_constant_v4i8:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, 4176
-; RV32-NEXT:    addi a1, a1, 1539
-; RV32-NEXT:    sw a1, 0(a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: store_constant_v4i8:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a1, 4176
-; RV64-NEXT:    addiw a1, a1, 1539
-; RV64-NEXT:    sw a1, 0(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: store_constant_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 4176
+; CHECK-NEXT:    addi a1, a1, 1539
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
   store <4 x i8> <i8 3, i8 6, i8 5, i8 1>, ptr %p
   ret void
 }
 
 define void @store_constant_v4i16(ptr %p) {
-; RV32-LABEL: store_constant_v4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, 4176
-; RV32-NEXT:    addi a1, a1, 1539
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v8, a1
-; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsext.vf2 v9, v8
-; RV32-NEXT:    vse16.v v9, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: store_constant_v4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a1, 4176
-; RV64-NEXT:    addiw a1, a1, 1539
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v8, a1
-; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsext.vf2 v9, v8
-; RV64-NEXT:    vse16.v v9, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: store_constant_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 4176
+; CHECK-NEXT:    addi a1, a1, 1539
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, a1
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v9, v8
+; CHECK-NEXT:    vse16.v v9, (a0)
+; CHECK-NEXT:    ret
   store <4 x i16> <i16 3, i16 6, i16 5, i16 1>, ptr %p
   ret void
 }
 
 define void @store_constant_v4i32(ptr %p) {
-; RV32-LABEL: store_constant_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, 4176
-; RV32-NEXT:    addi a1, a1, 1539
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v8, a1
-; RV32-NEXT:    vsext.vf4 v9, v8
-; RV32-NEXT:    vse32.v v9, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: store_constant_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a1, 4176
-; RV64-NEXT:    addiw a1, a1, 1539
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.s.x v8, a1
-; RV64-NEXT:    vsext.vf4 v9, v8
-; RV64-NEXT:    vse32.v v9, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: store_constant_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 4176
+; CHECK-NEXT:    addi a1, a1, 1539
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, a1
+; CHECK-NEXT:    vsext.vf4 v9, v8
+; CHECK-NEXT:    vse32.v v9, (a0)
+; CHECK-NEXT:    ret
   store <4 x i32> <i32 3, i32 6, i32 5, i32 1>, ptr %p
   ret void
 }
 
 define void @store_id_v4i8(ptr %p) {
-; RV32-LABEL: store_id_v4i8:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, 12320
-; RV32-NEXT:    addi a1, a1, 256
-; RV32-NEXT:    sw a1, 0(a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: store_id_v4i8:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a1, 12320
-; RV64-NEXT:    addiw a1, a1, 256
-; RV64-NEXT:    sw a1, 0(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: store_id_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 12320
+; CHECK-NEXT:    addi a1, a1, 256
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
   store <4 x i8> <i8 0, i8 1, i8 2, i8 3>, ptr %p
   ret void
 }
@@ -279,3 +237,6 @@ define void @store_constant_v2i8_volatile(ptr %p) {
   store volatile <2 x i8> <i8 1, i8 1>, ptr %p
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index 8ac3b7d02e338a8..ef970ad63ae77a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -56,7 +56,7 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
 ; V:       # %bb.0: # %entry
 ; V-NEXT:    li a2, 1024
 ; V-NEXT:    lui a3, 983765
-; V-NEXT:    addiw a3, a3, 873
+; V-NEXT:    addi a3, a3, 873
 ; V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; V-NEXT:    vmv.s.x v0, a3
 ; V-NEXT:    li a3, 32
@@ -80,7 +80,7 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
 ; ZVE32F:       # %bb.0: # %entry
 ; ZVE32F-NEXT:    li a2, 1024
 ; ZVE32F-NEXT:    lui a3, 983765
-; ZVE32F-NEXT:    addiw a3, a3, 873
+; ZVE32F-NEXT:    addi a3, a3, 873
 ; ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; ZVE32F-NEXT:    vmv.s.x v0, a3
 ; ZVE32F-NEXT:    li a3, 32
@@ -331,7 +331,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
 ; V-NEXT:    li a2, 1024
 ; V-NEXT:    li a3, 32
 ; V-NEXT:    lui a4, 983765
-; V-NEXT:    addiw a4, a4, 873
+; V-NEXT:    addi a4, a4, 873
 ; V-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
 ; V-NEXT:    vmv.s.x v0, a4
 ; V-NEXT:    li a4, 5
@@ -355,7 +355,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
 ; ZVE32F-NEXT:    li a2, 1024
 ; ZVE32F-NEXT:    li a3, 32
 ; ZVE32F-NEXT:    lui a4, 983765
-; ZVE32F-NEXT:    addiw a4, a4, 873
+; ZVE32F-NEXT:    addi a4, a4, 873
 ; ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
 ; ZVE32F-NEXT:    vmv.s.x v0, a4
 ; ZVE32F-NEXT:    li a4, 5
@@ -838,7 +838,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
 ; CHECK-NEXT:  # %bb.4: # %bb30
 ; CHECK-NEXT:    beq a4, a5, .LBB13_7
 ; CHECK-NEXT:  .LBB13_5: # %bb32
-; CHECK-NEXT:    addiw a2, a3, -1024
+; CHECK-NEXT:    addi a2, a3, -1024
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:    slli a4, a3, 2
 ; CHECK-NEXT:    add a1, a1, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll
index db2361dd3e58693..ca833451233becb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll
@@ -16,7 +16,7 @@ define i32 @test(i32 %call.i) {
 ; CHECK-V-NEXT:    vslide1down.vx v8, v8, a0
 ; CHECK-V-NEXT:    lui a0, 524288
 ; CHECK-V-NEXT:    vslide1down.vx v8, v8, a0
-; CHECK-V-NEXT:    addiw a0, a0, 2
+; CHECK-V-NEXT:    addi a0, a0, 2
 ; CHECK-V-NEXT:    vmslt.vx v0, v8, a0
 ; CHECK-V-NEXT:    vmv.v.i v8, 0
 ; CHECK-V-NEXT:    vmerge.vim v8, v8, 1, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 8e47dd72ae21814..6cfa504b501bacb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -911,7 +911,7 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v9, v8
 ; CHECK-V-NEXT:    lui a0, 8
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v9, a0
 ; CHECK-V-NEXT:    lui a0, 1048568
 ; CHECK-V-NEXT:    vmax.vx v8, v8, a0
@@ -952,7 +952,7 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-V-NEXT:    vfncvt.rtz.xu.f.w v9, v8
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vminu.vx v8, v9, a0
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-V-NEXT:    vnsrl.wi v8, v8, 0
@@ -993,7 +993,7 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v9, v8
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v9, a0
 ; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
@@ -1073,7 +1073,7 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; CHECK-V-NEXT:    lui a0, 8
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v8, a0
 ; CHECK-V-NEXT:    lui a0, 1048568
 ; CHECK-V-NEXT:    vmax.vx v8, v8, a0
@@ -1132,7 +1132,7 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vminu.vx v8, v8, a0
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-V-NEXT:    vnsrl.wi v8, v8, 0
@@ -1199,7 +1199,7 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v8, a0
 ; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
@@ -1511,7 +1511,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 7
 ; CHECK-V-NEXT:    lui a0, 8
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v10, a0
 ; CHECK-V-NEXT:    lui a0, 1048568
 ; CHECK-V-NEXT:    vmax.vx v10, v8, a0
@@ -1794,7 +1794,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 7
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vminu.vx v10, v10, a0
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-V-NEXT:    vnsrl.wi v8, v10, 0
@@ -2097,7 +2097,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 7
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v10, a0
 ; CHECK-V-NEXT:    vmax.vx v10, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
@@ -4227,7 +4227,7 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v9, v8
 ; CHECK-V-NEXT:    lui a0, 8
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v9, a0
 ; CHECK-V-NEXT:    lui a0, 1048568
 ; CHECK-V-NEXT:    vmax.vx v8, v8, a0
@@ -4266,7 +4266,7 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-V-NEXT:    vfncvt.rtz.xu.f.w v9, v8
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vminu.vx v8, v9, a0
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-V-NEXT:    vnsrl.wi v8, v8, 0
@@ -4306,7 +4306,7 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v9, v8
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v9, a0
 ; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
@@ -4384,7 +4384,7 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; CHECK-V-NEXT:    lui a0, 8
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v8, a0
 ; CHECK-V-NEXT:    lui a0, 1048568
 ; CHECK-V-NEXT:    vmax.vx v8, v8, a0
@@ -4441,7 +4441,7 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfcvt.rtz.xu.f.v v8, v8
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vminu.vx v8, v8, a0
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-V-NEXT:    vnsrl.wi v8, v8, 0
@@ -4507,7 +4507,7 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfcvt.rtz.x.f.v v8, v8
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v8, a0
 ; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
@@ -4817,7 +4817,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 7
 ; CHECK-V-NEXT:    lui a0, 8
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v10, a0
 ; CHECK-V-NEXT:    lui a0, 1048568
 ; CHECK-V-NEXT:    vmax.vx v10, v8, a0
@@ -5096,7 +5096,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 7
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vminu.vx v10, v10, a0
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-V-NEXT:    vnsrl.wi v8, v10, 0
@@ -5398,7 +5398,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vslideup.vi v10, v8, 7
 ; CHECK-V-NEXT:    lui a0, 16
-; CHECK-V-NEXT:    addiw a0, a0, -1
+; CHECK-V-NEXT:    addi a0, a0, -1
 ; CHECK-V-NEXT:    vmin.vx v8, v10, a0
 ; CHECK-V-NEXT:    vmax.vx v10, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
index 82c9f405c23923d..4e08f401ca4e904 100644
--- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
@@ -65,7 +65,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
 ; RV64-BITS-UNKNOWN-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v9
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v9, v9, a0
@@ -82,7 +82,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
 ; RV64-BITS-256-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    srli a0, a0, 2
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vid.v v9
 ; RV64-BITS-256-NEXT:    vrsub.vx v9, v9, a0
 ; RV64-BITS-256-NEXT:    vrgather.vv v10, v8, v9
@@ -97,7 +97,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
 ; RV64-BITS-512-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    srli a0, a0, 2
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vid.v v9
 ; RV64-BITS-512-NEXT:    vrsub.vx v9, v9, a0
 ; RV64-BITS-512-NEXT:    vrgather.vv v10, v8, v9
@@ -163,7 +163,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
 ; RV64-BITS-UNKNOWN-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v9
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v9, v9, a0
@@ -180,7 +180,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
 ; RV64-BITS-256-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    srli a0, a0, 1
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vid.v v9
 ; RV64-BITS-256-NEXT:    vrsub.vx v9, v9, a0
 ; RV64-BITS-256-NEXT:    vrgather.vv v10, v8, v9
@@ -195,7 +195,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
 ; RV64-BITS-512-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    srli a0, a0, 1
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vid.v v9
 ; RV64-BITS-512-NEXT:    vrsub.vx v9, v9, a0
 ; RV64-BITS-512-NEXT:    vrgather.vv v10, v8, v9
@@ -257,7 +257,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
 ; RV64-BITS-UNKNOWN-NEXT:    vmv.v.i v8, 0
 ; RV64-BITS-UNKNOWN-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v10
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v10, v10, a0
@@ -273,7 +273,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
 ; RV64-BITS-256-NEXT:    vmv.v.i v8, 0
 ; RV64-BITS-256-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vid.v v9
 ; RV64-BITS-256-NEXT:    vrsub.vx v9, v9, a0
 ; RV64-BITS-256-NEXT:    vrgather.vv v10, v8, v9
@@ -287,7 +287,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
 ; RV64-BITS-512-NEXT:    vmv.v.i v8, 0
 ; RV64-BITS-512-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vid.v v9
 ; RV64-BITS-512-NEXT:    vrsub.vx v9, v9, a0
 ; RV64-BITS-512-NEXT:    vrgather.vv v10, v8, v9
@@ -353,7 +353,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
 ; RV64-BITS-UNKNOWN-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v12
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v12, v12, a0
@@ -370,7 +370,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
 ; RV64-BITS-256-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    slli a0, a0, 1
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vid.v v10
 ; RV64-BITS-256-NEXT:    vrsub.vx v10, v10, a0
 ; RV64-BITS-256-NEXT:    vrgather.vv v12, v8, v10
@@ -385,7 +385,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
 ; RV64-BITS-512-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    slli a0, a0, 1
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vid.v v10
 ; RV64-BITS-512-NEXT:    vrsub.vx v10, v10, a0
 ; RV64-BITS-512-NEXT:    vrgather.vv v12, v8, v10
@@ -451,7 +451,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
 ; RV64-BITS-UNKNOWN-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v16
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v16, v16, a0
@@ -468,7 +468,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
 ; RV64-BITS-256-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    slli a0, a0, 2
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vid.v v12
 ; RV64-BITS-256-NEXT:    vrsub.vx v12, v12, a0
 ; RV64-BITS-256-NEXT:    vrgather.vv v16, v8, v12
@@ -483,7 +483,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
 ; RV64-BITS-512-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    slli a0, a0, 2
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vid.v v12
 ; RV64-BITS-512-NEXT:    vrsub.vx v12, v12, a0
 ; RV64-BITS-512-NEXT:    vrgather.vv v16, v8, v12
@@ -552,7 +552,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
 ; RV64-BITS-UNKNOWN:       # %bb.0:
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v8
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v8, v8, a0
@@ -574,7 +574,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
 ; RV64-BITS-256-NEXT:    vmerge.vim v8, v8, 1, v0
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    slli a0, a0, 3
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vid.v v16
 ; RV64-BITS-256-NEXT:    vrsub.vx v16, v16, a0
 ; RV64-BITS-256-NEXT:    vrgather.vv v24, v8, v16
@@ -586,7 +586,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
 ; RV64-BITS-512:       # %bb.0:
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    slli a0, a0, 2
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; RV64-BITS-512-NEXT:    vid.v v8
 ; RV64-BITS-512-NEXT:    vrsub.vx v8, v8, a0
@@ -650,7 +650,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) {
 ; RV64-BITS-UNKNOWN:       # %bb.0:
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 3
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v9
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v10, v9, a0
@@ -663,7 +663,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) {
 ; RV64-BITS-256:       # %bb.0:
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    srli a0, a0, 3
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; RV64-BITS-256-NEXT:    vid.v v9
 ; RV64-BITS-256-NEXT:    vrsub.vx v10, v9, a0
@@ -675,7 +675,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) {
 ; RV64-BITS-512:       # %bb.0:
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    srli a0, a0, 3
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; RV64-BITS-512-NEXT:    vid.v v9
 ; RV64-BITS-512-NEXT:    vrsub.vx v10, v9, a0
@@ -728,7 +728,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) {
 ; RV64-BITS-UNKNOWN:       # %bb.0:
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v9
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v10, v9, a0
@@ -741,7 +741,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) {
 ; RV64-BITS-256:       # %bb.0:
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    srli a0, a0, 2
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; RV64-BITS-256-NEXT:    vid.v v9
 ; RV64-BITS-256-NEXT:    vrsub.vx v10, v9, a0
@@ -753,7 +753,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) {
 ; RV64-BITS-512:       # %bb.0:
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    srli a0, a0, 2
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; RV64-BITS-512-NEXT:    vid.v v9
 ; RV64-BITS-512-NEXT:    vrsub.vx v10, v9, a0
@@ -806,7 +806,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) {
 ; RV64-BITS-UNKNOWN:       # %bb.0:
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    srli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v9
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v10, v9, a0
@@ -819,7 +819,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) {
 ; RV64-BITS-256:       # %bb.0:
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    srli a0, a0, 1
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; RV64-BITS-256-NEXT:    vid.v v9
 ; RV64-BITS-256-NEXT:    vrsub.vx v10, v9, a0
@@ -831,7 +831,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) {
 ; RV64-BITS-512:       # %bb.0:
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    srli a0, a0, 1
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; RV64-BITS-512-NEXT:    vid.v v9
 ; RV64-BITS-512-NEXT:    vrsub.vx v10, v9, a0
@@ -880,7 +880,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) {
 ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i8:
 ; RV64-BITS-UNKNOWN:       # %bb.0:
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v10
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v10, v10, a0
@@ -892,7 +892,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) {
 ; RV64-BITS-256-LABEL: reverse_nxv8i8:
 ; RV64-BITS-256:       # %bb.0:
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; RV64-BITS-256-NEXT:    vid.v v9
 ; RV64-BITS-256-NEXT:    vrsub.vx v10, v9, a0
@@ -903,7 +903,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) {
 ; RV64-BITS-512-LABEL: reverse_nxv8i8:
 ; RV64-BITS-512:       # %bb.0:
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; RV64-BITS-512-NEXT:    vid.v v9
 ; RV64-BITS-512-NEXT:    vrsub.vx v10, v9, a0
@@ -956,7 +956,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) {
 ; RV64-BITS-UNKNOWN:       # %bb.0:
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v12
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v12, v12, a0
@@ -969,7 +969,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) {
 ; RV64-BITS-256:       # %bb.0:
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    slli a0, a0, 1
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
 ; RV64-BITS-256-NEXT:    vid.v v10
 ; RV64-BITS-256-NEXT:    vrsub.vx v12, v10, a0
@@ -981,7 +981,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) {
 ; RV64-BITS-512:       # %bb.0:
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    slli a0, a0, 1
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
 ; RV64-BITS-512-NEXT:    vid.v v10
 ; RV64-BITS-512-NEXT:    vrsub.vx v12, v10, a0
@@ -1034,7 +1034,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) {
 ; RV64-BITS-UNKNOWN:       # %bb.0:
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v16
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v16, v16, a0
@@ -1047,7 +1047,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) {
 ; RV64-BITS-256:       # %bb.0:
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    slli a0, a0, 2
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; RV64-BITS-256-NEXT:    vid.v v12
 ; RV64-BITS-256-NEXT:    vrsub.vx v16, v12, a0
@@ -1059,7 +1059,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) {
 ; RV64-BITS-512:       # %bb.0:
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    slli a0, a0, 2
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; RV64-BITS-512-NEXT:    vid.v v12
 ; RV64-BITS-512-NEXT:    vrsub.vx v16, v12, a0
@@ -1114,7 +1114,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
 ; RV64-BITS-UNKNOWN:       # %bb.0:
 ; RV64-BITS-UNKNOWN-NEXT:    csrr a0, vlenb
 ; RV64-BITS-UNKNOWN-NEXT:    slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT:    addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT:    addi a0, a0, -1
 ; RV64-BITS-UNKNOWN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
 ; RV64-BITS-UNKNOWN-NEXT:    vid.v v16
 ; RV64-BITS-UNKNOWN-NEXT:    vrsub.vx v24, v16, a0
@@ -1128,7 +1128,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
 ; RV64-BITS-256:       # %bb.0:
 ; RV64-BITS-256-NEXT:    csrr a0, vlenb
 ; RV64-BITS-256-NEXT:    slli a0, a0, 3
-; RV64-BITS-256-NEXT:    addiw a0, a0, -1
+; RV64-BITS-256-NEXT:    addi a0, a0, -1
 ; RV64-BITS-256-NEXT:    vsetvli a1, zero, e8, m8, ta, ma
 ; RV64-BITS-256-NEXT:    vid.v v16
 ; RV64-BITS-256-NEXT:    vrsub.vx v24, v16, a0
@@ -1140,7 +1140,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
 ; RV64-BITS-512:       # %bb.0:
 ; RV64-BITS-512-NEXT:    csrr a0, vlenb
 ; RV64-BITS-512-NEXT:    slli a0, a0, 2
-; RV64-BITS-512-NEXT:    addiw a0, a0, -1
+; RV64-BITS-512-NEXT:    addi a0, a0, -1
 ; RV64-BITS-512-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
 ; RV64-BITS-512-NEXT:    vid.v v16
 ; RV64-BITS-512-NEXT:    vrsub.vx v24, v16, a0
@@ -1153,305 +1153,175 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
 }
 
 define <vscale x 1 x i16> @reverse_nxv1i16(<vscale x 1 x i16> %a) {
-; RV32-LABEL: reverse_nxv1i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 3
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vrsub.vx v10, v9, a0
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv1r.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv1i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 3
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vrsub.vx v10, v9, a0
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv1r.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vrsub.vx v10, v9, a0
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
   %res = call <vscale x 1 x i16> @llvm.experimental.vector.reverse.nxv1i16(<vscale x 1 x i16> %a)
   ret <vscale x 1 x i16> %res
 }
 
 define <vscale x 2 x i16> @reverse_nxv2i16(<vscale x 2 x i16> %a) {
-; RV32-LABEL: reverse_nxv2i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 2
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vrsub.vx v10, v9, a0
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv1r.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv2i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 2
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vrsub.vx v10, v9, a0
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv1r.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vrsub.vx v10, v9, a0
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16> %a)
   ret <vscale x 2 x i16> %res
 }
 
 define <vscale x 4 x i16> @reverse_nxv4i16(<vscale x 4 x i16> %a) {
-; RV32-LABEL: reverse_nxv4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 1
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vrsub.vx v10, v9, a0
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv.v.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 1
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vrsub.vx v10, v9, a0
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv.v.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vrsub.vx v10, v9, a0
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16> %a)
   ret <vscale x 4 x i16> %res
 }
 
 define <vscale x 8 x i16> @reverse_nxv8i16(<vscale x 8 x i16> %a) {
-; RV32-LABEL: reverse_nxv8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT:    vid.v v10
-; RV32-NEXT:    vrsub.vx v12, v10, a0
-; RV32-NEXT:    vrgather.vv v10, v8, v12
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    vrsub.vx v12, v10, a0
-; RV64-NEXT:    vrgather.vv v10, v8, v12
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vrsub.vx v12, v10, a0
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> %a)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 16 x i16> @reverse_nxv16i16(<vscale x 16 x i16> %a) {
-; RV32-LABEL: reverse_nxv16i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT:    vid.v v12
-; RV32-NEXT:    vrsub.vx v16, v12, a0
-; RV32-NEXT:    vrgather.vv v12, v8, v16
-; RV32-NEXT:    vmv.v.v v8, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv16i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT:    vid.v v12
-; RV64-NEXT:    vrsub.vx v16, v12, a0
-; RV64-NEXT:    vrgather.vv v12, v8, v16
-; RV64-NEXT:    vmv.v.v v8, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vid.v v12
+; CHECK-NEXT:    vrsub.vx v16, v12, a0
+; CHECK-NEXT:    vrgather.vv v12, v8, v16
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> %a)
   ret <vscale x 16 x i16> %res
 }
 
 define <vscale x 32 x i16> @reverse_nxv32i16(<vscale x 32 x i16> %a) {
-; RV32-LABEL: reverse_nxv32i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT:    vid.v v16
-; RV32-NEXT:    vrsub.vx v24, v16, a0
-; RV32-NEXT:    vrgather.vv v16, v8, v24
-; RV32-NEXT:    vmv.v.v v8, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv32i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT:    vid.v v16
-; RV64-NEXT:    vrsub.vx v24, v16, a0
-; RV64-NEXT:    vrgather.vv v16, v8, v24
-; RV64-NEXT:    vmv.v.v v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vid.v v16
+; CHECK-NEXT:    vrsub.vx v24, v16, a0
+; CHECK-NEXT:    vrgather.vv v16, v8, v24
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
   %res = call <vscale x 32 x i16> @llvm.experimental.vector.reverse.nxv32i16(<vscale x 32 x i16> %a)
   ret <vscale x 32 x i16> %res
 }
 
 define <vscale x 1 x i32> @reverse_nxv1i32(<vscale x 1 x i32> %a) {
-; RV32-LABEL: reverse_nxv1i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 3
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vrsub.vx v10, v9, a0
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv1r.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv1i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 3
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vrsub.vx v10, v9, a0
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv1r.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vrsub.vx v10, v9, a0
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
   %res = call <vscale x 1 x i32> @llvm.experimental.vector.reverse.nxv1i32(<vscale x 1 x i32> %a)
   ret <vscale x 1 x i32> %res
 }
 
 define <vscale x 2 x i32> @reverse_nxv2i32(<vscale x 2 x i32> %a) {
-; RV32-LABEL: reverse_nxv2i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 2
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vrsub.vx v10, v9, a0
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv.v.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv2i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 2
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vrsub.vx v10, v9, a0
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv.v.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vrsub.vx v10, v9, a0
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i32> @llvm.experimental.vector.reverse.nxv2i32(<vscale x 2 x i32> %a)
   ret <vscale x 2 x i32> %res
 }
 
 define <vscale x 4 x i32> @reverse_nxv4i32(<vscale x 4 x i32> %a) {
-; RV32-LABEL: reverse_nxv4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 1
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT:    vid.v v10
-; RV32-NEXT:    vrsub.vx v12, v10, a0
-; RV32-NEXT:    vrgather.vv v10, v8, v12
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 1
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    vrsub.vx v12, v10, a0
-; RV64-NEXT:    vrgather.vv v10, v8, v12
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vrsub.vx v12, v10, a0
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) {
-; RV32-LABEL: reverse_nxv8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vid.v v12
-; RV32-NEXT:    vrsub.vx v16, v12, a0
-; RV32-NEXT:    vrgather.vv v12, v8, v16
-; RV32-NEXT:    vmv.v.v v8, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT:    vid.v v12
-; RV64-NEXT:    vrsub.vx v16, v12, a0
-; RV64-NEXT:    vrgather.vv v12, v8, v16
-; RV64-NEXT:    vmv.v.v v8, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vid.v v12
+; CHECK-NEXT:    vrsub.vx v16, v12, a0
+; CHECK-NEXT:    vrgather.vv v12, v8, v16
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> %a)
   ret <vscale x 8 x i32> %res
 }
 
 define <vscale x 16 x i32> @reverse_nxv16i32(<vscale x 16 x i32> %a) {
-; RV32-LABEL: reverse_nxv16i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT:    vid.v v16
-; RV32-NEXT:    vrsub.vx v24, v16, a0
-; RV32-NEXT:    vrgather.vv v16, v8, v24
-; RV32-NEXT:    vmv.v.v v8, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv16i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT:    vid.v v16
-; RV64-NEXT:    vrsub.vx v24, v16, a0
-; RV64-NEXT:    vrgather.vv v16, v8, v24
-; RV64-NEXT:    vmv.v.v v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vid.v v16
+; CHECK-NEXT:    vrsub.vx v24, v16, a0
+; CHECK-NEXT:    vrgather.vv v16, v8, v24
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i32> @llvm.experimental.vector.reverse.nxv16i32(<vscale x 16 x i32> %a)
   ret <vscale x 16 x i32> %res
 }
@@ -1524,305 +1394,175 @@ define <vscale x 8 x i64> @reverse_nxv8i64(<vscale x 8 x i64> %a) {
 ;
 
 define <vscale x 1 x half> @reverse_nxv1f16(<vscale x 1 x half> %a) {
-; RV32-LABEL: reverse_nxv1f16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 3
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vrsub.vx v10, v9, a0
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv1r.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv1f16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 3
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vrsub.vx v10, v9, a0
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv1r.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv1f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vrsub.vx v10, v9, a0
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
   %res = call <vscale x 1 x half> @llvm.experimental.vector.reverse.nxv1f16(<vscale x 1 x half> %a)
   ret <vscale x 1 x half> %res
 }
 
 define <vscale x 2 x half> @reverse_nxv2f16(<vscale x 2 x half> %a) {
-; RV32-LABEL: reverse_nxv2f16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 2
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vrsub.vx v10, v9, a0
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv1r.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv2f16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 2
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vrsub.vx v10, v9, a0
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv1r.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vrsub.vx v10, v9, a0
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x half> @llvm.experimental.vector.reverse.nxv2f16(<vscale x 2 x half> %a)
   ret <vscale x 2 x half> %res
 }
 
 define <vscale x 4 x half> @reverse_nxv4f16(<vscale x 4 x half> %a) {
-; RV32-LABEL: reverse_nxv4f16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 1
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vrsub.vx v10, v9, a0
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv.v.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv4f16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 1
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vrsub.vx v10, v9, a0
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv.v.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vrsub.vx v10, v9, a0
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x half> @llvm.experimental.vector.reverse.nxv4f16(<vscale x 4 x half> %a)
   ret <vscale x 4 x half> %res
 }
 
 define <vscale x 8 x half> @reverse_nxv8f16(<vscale x 8 x half> %a) {
-; RV32-LABEL: reverse_nxv8f16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT:    vid.v v10
-; RV32-NEXT:    vrsub.vx v12, v10, a0
-; RV32-NEXT:    vrgather.vv v10, v8, v12
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv8f16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    vrsub.vx v12, v10, a0
-; RV64-NEXT:    vrgather.vv v10, v8, v12
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vrsub.vx v12, v10, a0
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> %a)
   ret <vscale x 8 x half> %res
 }
 
 define <vscale x 16 x half> @reverse_nxv16f16(<vscale x 16 x half> %a) {
-; RV32-LABEL: reverse_nxv16f16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT:    vid.v v12
-; RV32-NEXT:    vrsub.vx v16, v12, a0
-; RV32-NEXT:    vrgather.vv v12, v8, v16
-; RV32-NEXT:    vmv.v.v v8, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv16f16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT:    vid.v v12
-; RV64-NEXT:    vrsub.vx v16, v12, a0
-; RV64-NEXT:    vrgather.vv v12, v8, v16
-; RV64-NEXT:    vmv.v.v v8, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vid.v v12
+; CHECK-NEXT:    vrsub.vx v16, v12, a0
+; CHECK-NEXT:    vrgather.vv v12, v8, v16
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> %a)
   ret <vscale x 16 x half> %res
 }
 
 define <vscale x 32 x half> @reverse_nxv32f16(<vscale x 32 x half> %a) {
-; RV32-LABEL: reverse_nxv32f16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 2
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT:    vid.v v16
-; RV32-NEXT:    vrsub.vx v24, v16, a0
-; RV32-NEXT:    vrgather.vv v16, v8, v24
-; RV32-NEXT:    vmv.v.v v8, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv32f16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 2
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT:    vid.v v16
-; RV64-NEXT:    vrsub.vx v24, v16, a0
-; RV64-NEXT:    vrgather.vv v16, v8, v24
-; RV64-NEXT:    vmv.v.v v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv32f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vid.v v16
+; CHECK-NEXT:    vrsub.vx v24, v16, a0
+; CHECK-NEXT:    vrgather.vv v16, v8, v24
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
   %res = call <vscale x 32 x half> @llvm.experimental.vector.reverse.nxv32f16(<vscale x 32 x half> %a)
   ret <vscale x 32 x half> %res
 }
 
 define <vscale x 1 x float> @reverse_nxv1f32(<vscale x 1 x float> %a) {
-; RV32-LABEL: reverse_nxv1f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 3
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vrsub.vx v10, v9, a0
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv1r.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv1f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 3
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vrsub.vx v10, v9, a0
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv1r.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv1f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vrsub.vx v10, v9, a0
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
   %res = call <vscale x 1 x float> @llvm.experimental.vector.reverse.nxv1f32(<vscale x 1 x float> %a)
   ret <vscale x 1 x float> %res
 }
 
 define <vscale x 2 x float> @reverse_nxv2f32(<vscale x 2 x float> %a) {
-; RV32-LABEL: reverse_nxv2f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 2
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vrsub.vx v10, v9, a0
-; RV32-NEXT:    vrgather.vv v9, v8, v10
-; RV32-NEXT:    vmv.v.v v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv2f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 2
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT:    vid.v v9
-; RV64-NEXT:    vrsub.vx v10, v9, a0
-; RV64-NEXT:    vrgather.vv v9, v8, v10
-; RV64-NEXT:    vmv.v.v v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vid.v v9
+; CHECK-NEXT:    vrsub.vx v10, v9, a0
+; CHECK-NEXT:    vrgather.vv v9, v8, v10
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
   %res = call <vscale x 2 x float> @llvm.experimental.vector.reverse.nxv2f32(<vscale x 2 x float> %a)
   ret <vscale x 2 x float> %res
 }
 
 define <vscale x 4 x float> @reverse_nxv4f32(<vscale x 4 x float> %a) {
-; RV32-LABEL: reverse_nxv4f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    srli a0, a0, 1
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT:    vid.v v10
-; RV32-NEXT:    vrsub.vx v12, v10, a0
-; RV32-NEXT:    vrgather.vv v10, v8, v12
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv4f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    srli a0, a0, 1
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    vrsub.vx v12, v10, a0
-; RV64-NEXT:    vrgather.vv v10, v8, v12
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vrsub.vx v12, v10, a0
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
   %res = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
   ret <vscale x 4 x float> %res
 }
 
 define <vscale x 8 x float> @reverse_nxv8f32(<vscale x 8 x float> %a) {
-; RV32-LABEL: reverse_nxv8f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vid.v v12
-; RV32-NEXT:    vrsub.vx v16, v12, a0
-; RV32-NEXT:    vrgather.vv v12, v8, v16
-; RV32-NEXT:    vmv.v.v v8, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv8f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT:    vid.v v12
-; RV64-NEXT:    vrsub.vx v16, v12, a0
-; RV64-NEXT:    vrgather.vv v12, v8, v16
-; RV64-NEXT:    vmv.v.v v8, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vid.v v12
+; CHECK-NEXT:    vrsub.vx v16, v12, a0
+; CHECK-NEXT:    vrgather.vv v12, v8, v16
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
   %res = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> %a)
   ret <vscale x 8 x float> %res
 }
 
 define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) {
-; RV32-LABEL: reverse_nxv16f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT:    vid.v v16
-; RV32-NEXT:    vrsub.vx v24, v16, a0
-; RV32-NEXT:    vrgather.vv v16, v8, v24
-; RV32-NEXT:    vmv.v.v v8, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: reverse_nxv16f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT:    vid.v v16
-; RV64-NEXT:    vrsub.vx v24, v16, a0
-; RV64-NEXT:    vrgather.vv v16, v8, v24
-; RV64-NEXT:    vmv.v.v v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: reverse_nxv16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vid.v v16
+; CHECK-NEXT:    vrsub.vx v24, v16, a0
+; CHECK-NEXT:    vrgather.vv v16, v8, v24
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
   %res = call <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float> %a)
   ret <vscale x 16 x float> %res
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
index 1478e8bfd3c658d..f27edd36116657e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
@@ -14,7 +14,7 @@ define <vscale x 4 x i8> @foo(ptr %p) {
 ; CHECK-NEXT:    lui a0, 4
 ; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    lui a0, 1
-; CHECK-NEXT:    addiw a0, a0, -361
+; CHECK-NEXT:    addi a0, a0, -361
 ; CHECK-NEXT:    vmacc.vx v10, a0, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v10, 15
diff --git a/llvm/test/CodeGen/RISCV/rvv/select-sra.ll b/llvm/test/CodeGen/RISCV/rvv/select-sra.ll
index 6804029eaad7053..10c74a0e81e7eab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/select-sra.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/select-sra.ll
@@ -20,11 +20,11 @@ define <4 x i32> @vselect_of_consts(<4 x i1> %cc) {
 ; RV64-LABEL: vselect_of_consts:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 284280
-; RV64-NEXT:    addiw a0, a0, 291
+; RV64-NEXT:    addi a0, a0, 291
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64-NEXT:    vmv.v.x v8, a0
 ; RV64-NEXT:    lui a0, 214376
-; RV64-NEXT:    addiw a0, a0, -2030
+; RV64-NEXT:    addi a0, a0, -2030
 ; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; RV64-NEXT:    ret
   %v = select <4 x i1> %cc, <4 x i32> <i32 878082066, i32 878082066, i32 878082066, i32 878082066>, <4 x i32> <i32 1164411171, i32 1164411171, i32 1164411171, i32 1164411171>
diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
index c91db751561c365..6984f2b3402a76b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
@@ -104,45 +104,25 @@ define <16 x i8> @v16i8(<16 x i8> %a) {
 }
 
 define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) {
-; RV32-LABEL: v16i8_2:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI7_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI7_0)
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; RV32-NEXT:    vle8.v v12, (a0)
-; RV32-NEXT:    vmv1r.v v14, v9
-; RV32-NEXT:    vrgather.vv v10, v8, v12
-; RV32-NEXT:    vid.v v8
-; RV32-NEXT:    vrsub.vi v8, v8, 15
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a0
-; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; RV32-NEXT:    vrgather.vv v10, v14, v8, v0.t
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: v16i8_2:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI7_0)
-; RV64-NEXT:    addi a0, a0, %lo(.LCPI7_0)
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; RV64-NEXT:    vle8.v v12, (a0)
-; RV64-NEXT:    vmv1r.v v14, v9
-; RV64-NEXT:    vrgather.vv v10, v8, v12
-; RV64-NEXT:    vid.v v8
-; RV64-NEXT:    vrsub.vi v8, v8, 15
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a0
-; RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; RV64-NEXT:    vrgather.vv v10, v14, v8, v0.t
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: v16i8_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI7_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI7_0)
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vmv1r.v v14, v9
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    vrsub.vi v8, v8, 15
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
+; CHECK-NEXT:    vrgather.vv v10, v14, v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
   %v32i8 = shufflevector <16 x i8> %a, <16 x i8> %b,  <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <32 x i8> %v32i8
 }
@@ -248,45 +228,25 @@ define <16 x i16> @v16i16(<16 x i16> %a) {
 }
 
 define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) {
-; RV32-LABEL: v16i16_2:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI15_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI15_0)
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; RV32-NEXT:    vle16.v v20, (a0)
-; RV32-NEXT:    vmv2r.v v16, v10
-; RV32-NEXT:    vmv2r.v v12, v8
-; RV32-NEXT:    vrgather.vv v8, v12, v20
-; RV32-NEXT:    vid.v v12
-; RV32-NEXT:    vrsub.vi v12, v12, 15
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a0
-; RV32-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; RV32-NEXT:    vrgather.vv v8, v16, v12, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: v16i16_2:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI15_0)
-; RV64-NEXT:    addi a0, a0, %lo(.LCPI15_0)
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; RV64-NEXT:    vle16.v v20, (a0)
-; RV64-NEXT:    vmv2r.v v16, v10
-; RV64-NEXT:    vmv2r.v v12, v8
-; RV64-NEXT:    vrgather.vv v8, v12, v20
-; RV64-NEXT:    vid.v v12
-; RV64-NEXT:    vrsub.vi v12, v12, 15
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a0
-; RV64-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; RV64-NEXT:    vrgather.vv v8, v16, v12, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: v16i16_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI15_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI15_0)
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vle16.v v20, (a0)
+; CHECK-NEXT:    vmv2r.v v16, v10
+; CHECK-NEXT:    vmv2r.v v12, v8
+; CHECK-NEXT:    vrgather.vv v8, v12, v20
+; CHECK-NEXT:    vid.v v12
+; CHECK-NEXT:    vrsub.vi v12, v12, 15
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vrgather.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    ret
   %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b,  <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <32 x i16> %v32i16
 }
@@ -401,47 +361,26 @@ define <16 x i32> @v16i32(<16 x i32> %a) {
 }
 
 define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) {
-; RV32-LABEL: v16i32_2:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI23_0)
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; RV32-NEXT:    vle16.v v20, (a0)
-; RV32-NEXT:    vmv4r.v v24, v12
-; RV32-NEXT:    vmv4r.v v16, v8
-; RV32-NEXT:    vrgatherei16.vv v8, v16, v20
-; RV32-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; RV32-NEXT:    vid.v v16
-; RV32-NEXT:    vrsub.vi v16, v16, 15
-; RV32-NEXT:    lui a0, 16
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a0
-; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; RV32-NEXT:    vrgatherei16.vv v8, v24, v16, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: v16i32_2:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV64-NEXT:    addi a0, a0, %lo(.LCPI23_0)
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; RV64-NEXT:    vle16.v v20, (a0)
-; RV64-NEXT:    vmv4r.v v24, v12
-; RV64-NEXT:    vmv4r.v v16, v8
-; RV64-NEXT:    vrgatherei16.vv v8, v16, v20
-; RV64-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; RV64-NEXT:    vid.v v16
-; RV64-NEXT:    vrsub.vi v16, v16, 15
-; RV64-NEXT:    lui a0, 16
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT:    vmv.s.x v0, a0
-; RV64-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; RV64-NEXT:    vrgatherei16.vv v8, v24, v16, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: v16i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI23_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI23_0)
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vle16.v v20, (a0)
+; CHECK-NEXT:    vmv4r.v v24, v12
+; CHECK-NEXT:    vmv4r.v v16, v8
+; CHECK-NEXT:    vrgatherei16.vv v8, v16, v20
+; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vid.v v16
+; CHECK-NEXT:    vrsub.vi v16, v16, 15
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; CHECK-NEXT:    vrgatherei16.vv v8, v24, v16, v0.t
+; CHECK-NEXT:    ret
   %v32i32 = shufflevector <16 x i32> %a, <16 x i32> %b,  <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <32 x i32> %v32i32
 }
@@ -793,3 +732,6 @@ define <32 x i8> @v32i8(<32 x i8> %a) {
   ret <32 x i8> %v32i8
 }
 
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 63a85b1f4dc74e4..f08bfce409305c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -251,7 +251,7 @@ define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB7_5
 ; CHECK-NEXT:  .LBB7_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -342,7 +342,7 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB8_5
 ; CHECK-NEXT:  .LBB8_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -433,7 +433,7 @@ define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB9_5
 ; CHECK-NEXT:  .LBB9_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -524,7 +524,7 @@ define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB10_5
 ; CHECK-NEXT:  .LBB10_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -615,7 +615,7 @@ define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB11_5
 ; CHECK-NEXT:  .LBB11_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -706,7 +706,7 @@ define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB12_5
 ; CHECK-NEXT:  .LBB12_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -797,7 +797,7 @@ define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB13_5
 ; CHECK-NEXT:  .LBB13_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -990,7 +990,7 @@ define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB17_5
 ; CHECK-NEXT:  .LBB17_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -1081,7 +1081,7 @@ define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB18_5
 ; CHECK-NEXT:  .LBB18_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -1172,7 +1172,7 @@ define void @sink_splat_ashr_scalable(ptr nocapture %a) {
 ; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:    j .LBB19_5
 ; CHECK-NEXT:  .LBB19_2: # %vector.ph
-; CHECK-NEXT:    addiw a1, a2, -1
+; CHECK-NEXT:    addi a1, a2, -1
 ; CHECK-NEXT:    andi a3, a1, 1024
 ; CHECK-NEXT:    xori a1, a3, 1024
 ; CHECK-NEXT:    slli a4, a4, 1
@@ -1467,7 +1467,7 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB26_5
 ; CHECK-NEXT:  .LBB26_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
@@ -1557,7 +1557,7 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB27_5
 ; CHECK-NEXT:  .LBB27_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
@@ -1647,7 +1647,7 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB28_5
 ; CHECK-NEXT:  .LBB28_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
@@ -1737,7 +1737,7 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB29_5
 ; CHECK-NEXT:  .LBB29_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
@@ -1827,7 +1827,7 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB30_5
 ; CHECK-NEXT:  .LBB30_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
@@ -1917,7 +1917,7 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB31_5
 ; CHECK-NEXT:  .LBB31_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
@@ -2083,7 +2083,7 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap
 ; CHECK-NEXT:    li a4, 0
 ; CHECK-NEXT:    j .LBB34_5
 ; CHECK-NEXT:  .LBB34_2: # %vector.ph
-; CHECK-NEXT:    addiw a4, a3, -1
+; CHECK-NEXT:    addi a4, a3, -1
 ; CHECK-NEXT:    andi a5, a4, 1024
 ; CHECK-NEXT:    xori a4, a5, 1024
 ; CHECK-NEXT:    vsetvli a6, zero, e32, m1, ta, ma
@@ -2183,7 +2183,7 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali
 ; CHECK-NEXT:    li a4, 0
 ; CHECK-NEXT:    j .LBB35_5
 ; CHECK-NEXT:  .LBB35_2: # %vector.ph
-; CHECK-NEXT:    addiw a4, a3, -1
+; CHECK-NEXT:    addi a4, a3, -1
 ; CHECK-NEXT:    andi a5, a4, 1024
 ; CHECK-NEXT:    xori a4, a5, 1024
 ; CHECK-NEXT:    vsetvli a6, zero, e32, m1, ta, ma
@@ -2496,7 +2496,7 @@ define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB42_5
 ; CHECK-NEXT:  .LBB42_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -2587,7 +2587,7 @@ define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB43_5
 ; CHECK-NEXT:  .LBB43_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -2678,7 +2678,7 @@ define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB44_5
 ; CHECK-NEXT:  .LBB44_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
@@ -2769,7 +2769,7 @@ define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB45_5
 ; CHECK-NEXT:  .LBB45_2: # %vector.ph
-; CHECK-NEXT:    addiw a2, a3, -1
+; CHECK-NEXT:    addi a2, a3, -1
 ; CHECK-NEXT:    andi a4, a2, 1024
 ; CHECK-NEXT:    xori a2, a4, 1024
 ; CHECK-NEXT:    slli a5, a5, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
index 56d98981947c3c0..b7fe722958bfb89 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
@@ -32,7 +32,7 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vmsle.vi v0, v8, -1
 ; CHECK-NEXT:    lui a0, 524288
-; CHECK-NEXT:    addiw a1, a0, -1
+; CHECK-NEXT:    addi a1, a0, -1
 ; CHECK-NEXT:    vsll.vv v10, v8, v9
 ; CHECK-NEXT:    vsra.vv v9, v10, v9
 ; CHECK-NEXT:    vmsne.vv v8, v8, v9
@@ -51,7 +51,7 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmsle.vi v0, v8, -1
 ; CHECK-NEXT:    lui a0, 8
-; CHECK-NEXT:    addiw a1, a0, -1
+; CHECK-NEXT:    addi a1, a0, -1
 ; CHECK-NEXT:    vsll.vv v10, v8, v9
 ; CHECK-NEXT:    vsra.vv v9, v10, v9
 ; CHECK-NEXT:    vmsne.vv v8, v8, v9
@@ -114,7 +114,7 @@ define <vscale x 4 x i32> @vec_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32>
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmsle.vi v0, v8, -1
 ; CHECK-NEXT:    lui a0, 524288
-; CHECK-NEXT:    addiw a1, a0, -1
+; CHECK-NEXT:    addi a1, a0, -1
 ; CHECK-NEXT:    vsll.vv v12, v8, v10
 ; CHECK-NEXT:    vsra.vv v14, v12, v10
 ; CHECK-NEXT:    vmsne.vv v10, v8, v14
@@ -133,7 +133,7 @@ define <vscale x 8 x i16> @vec_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16>
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmsle.vi v0, v8, -1
 ; CHECK-NEXT:    lui a0, 8
-; CHECK-NEXT:    addiw a1, a0, -1
+; CHECK-NEXT:    addi a1, a0, -1
 ; CHECK-NEXT:    vsll.vv v12, v8, v10
 ; CHECK-NEXT:    vsra.vv v14, v12, v10
 ; CHECK-NEXT:    vmsne.vv v10, v8, v14
diff --git a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll
index 839538039c70b97..bfbbb4b4067f841 100644
--- a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll
@@ -22,14 +22,14 @@ define <vscale x 1 x i16> @test_urem_vec_even_divisor_eq0(<vscale x 1 x i16> %x)
 ; RV64-LABEL: test_urem_vec_even_divisor_eq0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, -1365
+; RV64-NEXT:    addi a0, a0, -1365
 ; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsll.vi v9, v8, 15
 ; RV64-NEXT:    vsrl.vi v8, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, -1366
+; RV64-NEXT:    addi a0, a0, -1366
 ; RV64-NEXT:    vmsgtu.vx v0, v8, a0
 ; RV64-NEXT:    vmv.v.i v8, 0
 ; RV64-NEXT:    vmerge.vim v8, v8, -1, v0
@@ -61,11 +61,11 @@ define <vscale x 1 x i16> @test_urem_vec_odd_divisor_eq0(<vscale x 1 x i16> %x)
 ; RV64-LABEL: test_urem_vec_odd_divisor_eq0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 1048573
-; RV64-NEXT:    addiw a0, a0, -819
+; RV64-NEXT:    addi a0, a0, -819
 ; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 819
+; RV64-NEXT:    addi a0, a0, 819
 ; RV64-NEXT:    vmsgtu.vx v0, v8, a0
 ; RV64-NEXT:    vmv.v.i v8, 0
 ; RV64-NEXT:    vmerge.vim v8, v8, -1, v0
@@ -105,13 +105,13 @@ define <vscale x 1 x i16> @test_urem_vec_even_divisor_eq1(<vscale x 1 x i16> %x)
 ; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; RV64-NEXT:    vsub.vx v8, v8, a0
 ; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, -1365
+; RV64-NEXT:    addi a0, a0, -1365
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    vsll.vi v9, v8, 15
 ; RV64-NEXT:    vsrl.vi v8, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, -1366
+; RV64-NEXT:    addi a0, a0, -1366
 ; RV64-NEXT:    vmsgtu.vx v0, v8, a0
 ; RV64-NEXT:    vmv.v.i v8, 0
 ; RV64-NEXT:    vmerge.vim v8, v8, -1, v0
@@ -148,10 +148,10 @@ define <vscale x 1 x i16> @test_urem_vec_odd_divisor_eq1(<vscale x 1 x i16> %x)
 ; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; RV64-NEXT:    vsub.vx v8, v8, a0
 ; RV64-NEXT:    lui a0, 1048573
-; RV64-NEXT:    addiw a0, a0, -819
+; RV64-NEXT:    addi a0, a0, -819
 ; RV64-NEXT:    vmul.vx v8, v8, a0
 ; RV64-NEXT:    lui a0, 3
-; RV64-NEXT:    addiw a0, a0, 818
+; RV64-NEXT:    addi a0, a0, 818
 ; RV64-NEXT:    vmsgtu.vx v0, v8, a0
 ; RV64-NEXT:    vmv.v.i v8, 0
 ; RV64-NEXT:    vmerge.vim v8, v8, -1, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
index 5e32e551ba0dd80..0028ac88cc4fed5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
@@ -324,27 +324,16 @@ define <vscale x 1 x i16> @vdiv_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext %
 }
 
 define <vscale x 1 x i16> @vdiv_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv1i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vmulh.vx v8, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vsrl.vi v9, v8, 15
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdiv_vi_nxv1i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vsrl.vi v9, v8, 15
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdiv_vi_nxv1i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmulh.vx v8, v8, a0
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsrl.vi v9, v8, 15
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
   %vc = sdiv <vscale x 1 x i16> %va, %splat
@@ -374,27 +363,16 @@ define <vscale x 2 x i16> @vdiv_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext %
 }
 
 define <vscale x 2 x i16> @vdiv_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv2i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vmulh.vx v8, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vsrl.vi v9, v8, 15
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdiv_vi_nxv2i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vsrl.vi v9, v8, 15
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdiv_vi_nxv2i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmulh.vx v8, v8, a0
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsrl.vi v9, v8, 15
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
   %vc = sdiv <vscale x 2 x i16> %va, %splat
@@ -424,27 +402,16 @@ define <vscale x 4 x i16> @vdiv_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext %
 }
 
 define <vscale x 4 x i16> @vdiv_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv4i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT:    vmulh.vx v8, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vsrl.vi v9, v8, 15
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdiv_vi_nxv4i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vsrl.vi v9, v8, 15
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdiv_vi_nxv4i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmulh.vx v8, v8, a0
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsrl.vi v9, v8, 15
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
   %vc = sdiv <vscale x 4 x i16> %va, %splat
@@ -474,27 +441,16 @@ define <vscale x 8 x i16> @vdiv_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext %
 }
 
 define <vscale x 8 x i16> @vdiv_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv8i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT:    vmulh.vx v8, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vsrl.vi v10, v8, 15
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdiv_vi_nxv8i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vsrl.vi v10, v8, 15
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdiv_vi_nxv8i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmulh.vx v8, v8, a0
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsrl.vi v10, v8, 15
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
   %vc = sdiv <vscale x 8 x i16> %va, %splat
@@ -524,27 +480,16 @@ define <vscale x 16 x i16> @vdiv_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signex
 }
 
 define <vscale x 16 x i16> @vdiv_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv16i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT:    vmulh.vx v8, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vsrl.vi v12, v8, 15
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdiv_vi_nxv16i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vsrl.vi v12, v8, 15
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdiv_vi_nxv16i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vmulh.vx v8, v8, a0
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsrl.vi v12, v8, 15
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
   %vc = sdiv <vscale x 16 x i16> %va, %splat
@@ -574,27 +519,16 @@ define <vscale x 32 x i16> @vdiv_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signex
 }
 
 define <vscale x 32 x i16> @vdiv_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv32i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT:    vmulh.vx v8, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vsrl.vi v16, v8, 15
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdiv_vi_nxv32i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vsrl.vi v16, v8, 15
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdiv_vi_nxv32i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vmulh.vx v8, v8, a0
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsrl.vi v16, v8, 15
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
   %vc = sdiv <vscale x 32 x i16> %va, %splat
@@ -639,7 +573,7 @@ define <vscale x 1 x i32> @vdiv_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
 ; RV64-LABEL: vdiv_vi_nxv1i32_0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 449390
-; RV64-NEXT:    addiw a0, a0, -1171
+; RV64-NEXT:    addi a0, a0, -1171
 ; RV64-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; RV64-NEXT:    vmulh.vx v9, v8, a0
 ; RV64-NEXT:    vsub.vv v8, v9, v8
@@ -691,7 +625,7 @@ define <vscale x 2 x i32> @vdiv_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
 ; RV64-LABEL: vdiv_vi_nxv2i32_0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 449390
-; RV64-NEXT:    addiw a0, a0, -1171
+; RV64-NEXT:    addi a0, a0, -1171
 ; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV64-NEXT:    vmulh.vx v9, v8, a0
 ; RV64-NEXT:    vsub.vv v8, v9, v8
@@ -743,7 +677,7 @@ define <vscale x 4 x i32> @vdiv_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
 ; RV64-LABEL: vdiv_vi_nxv4i32_0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 449390
-; RV64-NEXT:    addiw a0, a0, -1171
+; RV64-NEXT:    addi a0, a0, -1171
 ; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV64-NEXT:    vmulh.vx v10, v8, a0
 ; RV64-NEXT:    vsub.vv v8, v10, v8
@@ -795,7 +729,7 @@ define <vscale x 8 x i32> @vdiv_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
 ; RV64-LABEL: vdiv_vi_nxv8i32_0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 449390
-; RV64-NEXT:    addiw a0, a0, -1171
+; RV64-NEXT:    addi a0, a0, -1171
 ; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vmulh.vx v12, v8, a0
 ; RV64-NEXT:    vsub.vv v8, v12, v8
@@ -847,7 +781,7 @@ define <vscale x 16 x i32> @vdiv_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
 ; RV64-LABEL: vdiv_vi_nxv16i32_0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 449390
-; RV64-NEXT:    addiw a0, a0, -1171
+; RV64-NEXT:    addi a0, a0, -1171
 ; RV64-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
 ; RV64-NEXT:    vmulh.vx v16, v8, a0
 ; RV64-NEXT:    vsub.vv v8, v16, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
index 90b8a7fa70b32ef..c505cb3d1bbd4ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
@@ -303,23 +303,14 @@ define <vscale x 1 x i16> @vdivu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext
 }
 
 define <vscale x 1 x i16> @vdivu_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv1i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 13
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv1i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 13
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv1i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 13
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
   %vc = udiv <vscale x 1 x i16> %va, %splat
@@ -349,23 +340,14 @@ define <vscale x 2 x i16> @vdivu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext
 }
 
 define <vscale x 2 x i16> @vdivu_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv2i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 13
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv2i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 13
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv2i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 13
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
   %vc = udiv <vscale x 2 x i16> %va, %splat
@@ -395,23 +377,14 @@ define <vscale x 4 x i16> @vdivu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext
 }
 
 define <vscale x 4 x i16> @vdivu_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv4i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 13
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv4i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 13
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv4i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 13
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
   %vc = udiv <vscale x 4 x i16> %va, %splat
@@ -441,23 +414,14 @@ define <vscale x 8 x i16> @vdivu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext
 }
 
 define <vscale x 8 x i16> @vdivu_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv8i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 13
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv8i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 13
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv8i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 13
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
   %vc = udiv <vscale x 8 x i16> %va, %splat
@@ -487,23 +451,14 @@ define <vscale x 16 x i16> @vdivu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signe
 }
 
 define <vscale x 16 x i16> @vdivu_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv16i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 13
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv16i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 13
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv16i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 13
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
   %vc = udiv <vscale x 16 x i16> %va, %splat
@@ -533,23 +488,14 @@ define <vscale x 32 x i16> @vdivu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signe
 }
 
 define <vscale x 32 x i16> @vdivu_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv32i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 13
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv32i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 13
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv32i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 13
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
   %vc = udiv <vscale x 32 x i16> %va, %splat
@@ -579,23 +525,14 @@ define <vscale x 1 x i32> @vdivu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 signext
 }
 
 define <vscale x 1 x i32> @vdivu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vdivu_vi_nxv1i32_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 29
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv1i32_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 131072
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 29
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv1i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 29
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i32> poison, i32 -7, i32 0
   %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
   %vc = udiv <vscale x 1 x i32> %va, %splat
@@ -625,23 +562,14 @@ define <vscale x 2 x i32> @vdivu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 signext
 }
 
 define <vscale x 2 x i32> @vdivu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vdivu_vi_nxv2i32_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 29
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv2i32_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 131072
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 29
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 29
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
   %vc = udiv <vscale x 2 x i32> %va, %splat
@@ -671,23 +599,14 @@ define <vscale x 4 x i32> @vdivu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 signext
 }
 
 define <vscale x 4 x i32> @vdivu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vdivu_vi_nxv4i32_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 29
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv4i32_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 131072
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 29
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv4i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 29
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 -7, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
   %vc = udiv <vscale x 4 x i32> %va, %splat
@@ -717,23 +636,14 @@ define <vscale x 8 x i32> @vdivu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 signext
 }
 
 define <vscale x 8 x i32> @vdivu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vdivu_vi_nxv8i32_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 29
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv8i32_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 131072
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 29
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv8i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 29
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 -7, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
   %vc = udiv <vscale x 8 x i32> %va, %splat
@@ -763,23 +673,14 @@ define <vscale x 16 x i32> @vdivu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 signe
 }
 
 define <vscale x 16 x i32> @vdivu_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
-; RV32-LABEL: vdivu_vi_nxv16i32_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmulhu.vx v8, v8, a0
-; RV32-NEXT:    vsrl.vi v8, v8, 29
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_nxv16i32_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 131072
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 29
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_nxv16i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vmulhu.vx v8, v8, a0
+; CHECK-NEXT:    vsrl.vi v8, v8, 29
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i32> poison, i32 -7, i32 0
   %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
   %vc = udiv <vscale x 16 x i32> %va, %splat
@@ -1231,29 +1132,17 @@ define <vscale x 8 x i32> @vdivu_vx_mask_nxv8i32(<vscale x 8 x i32> %va, i32 sig
 }
 
 define <vscale x 8 x i32> @vdivu_vi_mask_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %mask) {
-; RV32-LABEL: vdivu_vi_mask_nxv8i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 149797
-; RV32-NEXT:    addi a0, a0, -1755
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT:    vmulhu.vx v12, v8, a0
-; RV32-NEXT:    vsub.vv v16, v8, v12
-; RV32-NEXT:    vsrl.vi v16, v16, 1
-; RV32-NEXT:    vadd.vv v12, v16, v12
-; RV32-NEXT:    vsrl.vi v8, v12, 2, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vdivu_vi_mask_nxv8i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 149797
-; RV64-NEXT:    addiw a0, a0, -1755
-; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; RV64-NEXT:    vmulhu.vx v12, v8, a0
-; RV64-NEXT:    vsub.vv v16, v8, v12
-; RV64-NEXT:    vsrl.vi v16, v16, 1
-; RV64-NEXT:    vadd.vv v12, v16, v12
-; RV64-NEXT:    vsrl.vi v8, v12, 2, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: vdivu_vi_mask_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 149797
+; CHECK-NEXT:    addi a0, a0, -1755
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vmulhu.vx v12, v8, a0
+; CHECK-NEXT:    vsub.vv v16, v8, v12
+; CHECK-NEXT:    vsrl.vi v16, v16, 1
+; CHECK-NEXT:    vadd.vv v12, v16, v12
+; CHECK-NEXT:    vsrl.vi v8, v12, 2, v0.t
+; CHECK-NEXT:    ret
   %head1 = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
   %one = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
   %head2 = insertelement <vscale x 8 x i32> poison, i32 7, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index ff236d7def7d6e1..b8f2afd194e46d6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -89,35 +89,35 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
 }
 
 define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; RV32-LABEL: vector_interleave_v4i64_v2i64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v10, v9
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 2
-; RV32-NEXT:    lui a0, 12304
-; RV32-NEXT:    addi a0, a0, 512
-; RV32-NEXT:    vmv.s.x v10, a0
-; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsext.vf2 v12, v10
-; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
+; CHECK-LABEL: vector_interleave_v4i64_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v10, v9
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    lui a0, 12304
+; CHECK-NEXT:    addi a0, a0, 512
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v10
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
 ;
-; RV64-LABEL: vector_interleave_v4i64_v2i64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vmv1r.v v10, v9
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 2
-; RV64-NEXT:    lui a0, 12304
-; RV64-NEXT:    addiw a0, a0, 512
-; RV64-NEXT:    vmv.s.x v10, a0
-; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsext.vf2 v12, v10
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; ZVBB-LABEL: vector_interleave_v4i64_v2i64:
+; ZVBB:       # %bb.0:
+; ZVBB-NEXT:    vmv1r.v v10, v9
+; ZVBB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT:    vslideup.vi v8, v10, 2
+; ZVBB-NEXT:    lui a0, 12304
+; ZVBB-NEXT:    addi a0, a0, 512
+; ZVBB-NEXT:    vmv.s.x v10, a0
+; ZVBB-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVBB-NEXT:    vsext.vf2 v12, v10
+; ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVBB-NEXT:    vrgatherei16.vv v10, v8, v12
+; ZVBB-NEXT:    vmv.v.v v8, v10
+; ZVBB-NEXT:    ret
 	   %res = call <4 x i64> @llvm.experimental.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b)
 	   ret <4 x i64> %res
 }
@@ -237,35 +237,35 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b
 }
 
 define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) {
-; RV32-LABEL: vector_interleave_v4f64_v2f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vmv1r.v v10, v9
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 2
-; RV32-NEXT:    lui a0, 12304
-; RV32-NEXT:    addi a0, a0, 512
-; RV32-NEXT:    vmv.s.x v10, a0
-; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vsext.vf2 v12, v10
-; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV32-NEXT:    vmv.v.v v8, v10
-; RV32-NEXT:    ret
+; CHECK-LABEL: vector_interleave_v4f64_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v10, v9
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    lui a0, 12304
+; CHECK-NEXT:    addi a0, a0, 512
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v10
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
 ;
-; RV64-LABEL: vector_interleave_v4f64_v2f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vmv1r.v v10, v9
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 2
-; RV64-NEXT:    lui a0, 12304
-; RV64-NEXT:    addiw a0, a0, 512
-; RV64-NEXT:    vmv.s.x v10, a0
-; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vsext.vf2 v12, v10
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT:    vrgatherei16.vv v10, v8, v12
-; RV64-NEXT:    vmv.v.v v8, v10
-; RV64-NEXT:    ret
+; ZVBB-LABEL: vector_interleave_v4f64_v2f64:
+; ZVBB:       # %bb.0:
+; ZVBB-NEXT:    vmv1r.v v10, v9
+; ZVBB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT:    vslideup.vi v8, v10, 2
+; ZVBB-NEXT:    lui a0, 12304
+; ZVBB-NEXT:    addi a0, a0, 512
+; ZVBB-NEXT:    vmv.s.x v10, a0
+; ZVBB-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVBB-NEXT:    vsext.vf2 v12, v10
+; ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; ZVBB-NEXT:    vrgatherei16.vv v10, v8, v12
+; ZVBB-NEXT:    vmv.v.v v8, v10
+; ZVBB-NEXT:    ret
 	   %res = call <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b)
 	   ret <4 x double> %res
 }
@@ -277,3 +277,6 @@ declare <4 x float> @llvm.experimental.vector.interleave2.v4f32(<2 x float>, <2
 declare <16 x half> @llvm.experimental.vector.interleave2.v16f16(<8 x half>, <8 x half>)
 declare <8 x float> @llvm.experimental.vector.interleave2.v8f32(<4 x float>, <4 x float>)
 declare <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double>, <2 x double>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
index d407cf43a4fc1be..58874fe8c8fca7a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
@@ -392,31 +392,18 @@ define <vscale x 1 x i16> @vrem_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext %
 }
 
 define <vscale x 1 x i16> @vrem_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv1i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vmulh.vx v9, v8, a0
-; RV32-NEXT:    vsra.vi v9, v9, 1
-; RV32-NEXT:    vsrl.vi v10, v9, 15
-; RV32-NEXT:    vadd.vv v9, v9, v10
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vrem_vi_nxv1i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vmulh.vx v9, v8, a0
-; RV64-NEXT:    vsra.vi v9, v9, 1
-; RV64-NEXT:    vsrl.vi v10, v9, 15
-; RV64-NEXT:    vadd.vv v9, v9, v10
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vrem_vi_nxv1i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmulh.vx v9, v8, a0
+; CHECK-NEXT:    vsra.vi v9, v9, 1
+; CHECK-NEXT:    vsrl.vi v10, v9, 15
+; CHECK-NEXT:    vadd.vv v9, v9, v10
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
   %vc = srem <vscale x 1 x i16> %va, %splat
@@ -459,31 +446,18 @@ define <vscale x 2 x i16> @vrem_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext %
 }
 
 define <vscale x 2 x i16> @vrem_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv2i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vmulh.vx v9, v8, a0
-; RV32-NEXT:    vsra.vi v9, v9, 1
-; RV32-NEXT:    vsrl.vi v10, v9, 15
-; RV32-NEXT:    vadd.vv v9, v9, v10
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vrem_vi_nxv2i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vmulh.vx v9, v8, a0
-; RV64-NEXT:    vsra.vi v9, v9, 1
-; RV64-NEXT:    vsrl.vi v10, v9, 15
-; RV64-NEXT:    vadd.vv v9, v9, v10
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vrem_vi_nxv2i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmulh.vx v9, v8, a0
+; CHECK-NEXT:    vsra.vi v9, v9, 1
+; CHECK-NEXT:    vsrl.vi v10, v9, 15
+; CHECK-NEXT:    vadd.vv v9, v9, v10
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
   %vc = srem <vscale x 2 x i16> %va, %splat
@@ -526,31 +500,18 @@ define <vscale x 4 x i16> @vrem_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext %
 }
 
 define <vscale x 4 x i16> @vrem_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv4i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT:    vmulh.vx v9, v8, a0
-; RV32-NEXT:    vsra.vi v9, v9, 1
-; RV32-NEXT:    vsrl.vi v10, v9, 15
-; RV32-NEXT:    vadd.vv v9, v9, v10
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vrem_vi_nxv4i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT:    vmulh.vx v9, v8, a0
-; RV64-NEXT:    vsra.vi v9, v9, 1
-; RV64-NEXT:    vsrl.vi v10, v9, 15
-; RV64-NEXT:    vadd.vv v9, v9, v10
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vrem_vi_nxv4i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmulh.vx v9, v8, a0
+; CHECK-NEXT:    vsra.vi v9, v9, 1
+; CHECK-NEXT:    vsrl.vi v10, v9, 15
+; CHECK-NEXT:    vadd.vv v9, v9, v10
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
   %vc = srem <vscale x 4 x i16> %va, %splat
@@ -593,31 +554,18 @@ define <vscale x 8 x i16> @vrem_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext %
 }
 
 define <vscale x 8 x i16> @vrem_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv8i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT:    vmulh.vx v10, v8, a0
-; RV32-NEXT:    vsra.vi v10, v10, 1
-; RV32-NEXT:    vsrl.vi v12, v10, 15
-; RV32-NEXT:    vadd.vv v10, v10, v12
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vrem_vi_nxv8i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT:    vmulh.vx v10, v8, a0
-; RV64-NEXT:    vsra.vi v10, v10, 1
-; RV64-NEXT:    vsrl.vi v12, v10, 15
-; RV64-NEXT:    vadd.vv v10, v10, v12
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: vrem_vi_nxv8i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmulh.vx v10, v8, a0
+; CHECK-NEXT:    vsra.vi v10, v10, 1
+; CHECK-NEXT:    vsrl.vi v12, v10, 15
+; CHECK-NEXT:    vadd.vv v10, v10, v12
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v10
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
   %vc = srem <vscale x 8 x i16> %va, %splat
@@ -660,31 +608,18 @@ define <vscale x 16 x i16> @vrem_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signex
 }
 
 define <vscale x 16 x i16> @vrem_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv16i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT:    vmulh.vx v12, v8, a0
-; RV32-NEXT:    vsra.vi v12, v12, 1
-; RV32-NEXT:    vsrl.vi v16, v12, 15
-; RV32-NEXT:    vadd.vv v12, v12, v16
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vrem_vi_nxv16i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT:    vmulh.vx v12, v8, a0
-; RV64-NEXT:    vsra.vi v12, v12, 1
-; RV64-NEXT:    vsrl.vi v16, v12, 15
-; RV64-NEXT:    vadd.vv v12, v12, v16
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: vrem_vi_nxv16i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vmulh.vx v12, v8, a0
+; CHECK-NEXT:    vsra.vi v12, v12, 1
+; CHECK-NEXT:    vsrl.vi v16, v12, 15
+; CHECK-NEXT:    vadd.vv v12, v12, v16
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v12
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
   %vc = srem <vscale x 16 x i16> %va, %splat
@@ -727,31 +662,18 @@ define <vscale x 32 x i16> @vrem_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signex
 }
 
 define <vscale x 32 x i16> @vrem_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv32i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 1048571
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT:    vmulh.vx v16, v8, a0
-; RV32-NEXT:    vsra.vi v16, v16, 1
-; RV32-NEXT:    vsrl.vi v24, v16, 15
-; RV32-NEXT:    vadd.vv v16, v16, v24
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vrem_vi_nxv32i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 1048571
-; RV64-NEXT:    addiw a0, a0, 1755
-; RV64-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT:    vmulh.vx v16, v8, a0
-; RV64-NEXT:    vsra.vi v16, v16, 1
-; RV64-NEXT:    vsrl.vi v24, v16, 15
-; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: vrem_vi_nxv32i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048571
+; CHECK-NEXT:    addi a0, a0, 1755
+; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vmulh.vx v16, v8, a0
+; CHECK-NEXT:    vsra.vi v16, v16, 1
+; CHECK-NEXT:    vsrl.vi v24, v16, 15
+; CHECK-NEXT:    vadd.vv v16, v16, v24
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v16
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
   %vc = srem <vscale x 32 x i16> %va, %splat
@@ -798,7 +720,7 @@ define <vscale x 1 x i32> @vrem_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
 ; RV64-LABEL: vrem_vi_nxv1i32_0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 449390
-; RV64-NEXT:    addiw a0, a0, -1171
+; RV64-NEXT:    addi a0, a0, -1171
 ; RV64-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; RV64-NEXT:    vmulh.vx v9, v8, a0
 ; RV64-NEXT:    vsub.vv v9, v9, v8
@@ -854,7 +776,7 @@ define <vscale x 2 x i32> @vrem_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
 ; RV64-LABEL: vrem_vi_nxv2i32_0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 449390
-; RV64-NEXT:    addiw a0, a0, -1171
+; RV64-NEXT:    addi a0, a0, -1171
 ; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV64-NEXT:    vmulh.vx v9, v8, a0
 ; RV64-NEXT:    vsub.vv v9, v9, v8
@@ -910,7 +832,7 @@ define <vscale x 4 x i32> @vrem_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
 ; RV64-LABEL: vrem_vi_nxv4i32_0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 449390
-; RV64-NEXT:    addiw a0, a0, -1171
+; RV64-NEXT:    addi a0, a0, -1171
 ; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV64-NEXT:    vmulh.vx v10, v8, a0
 ; RV64-NEXT:    vsub.vv v10, v10, v8
@@ -966,7 +888,7 @@ define <vscale x 8 x i32> @vrem_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
 ; RV64-LABEL: vrem_vi_nxv8i32_0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 449390
-; RV64-NEXT:    addiw a0, a0, -1171
+; RV64-NEXT:    addi a0, a0, -1171
 ; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vmulh.vx v12, v8, a0
 ; RV64-NEXT:    vsub.vv v12, v12, v8
@@ -1022,7 +944,7 @@ define <vscale x 16 x i32> @vrem_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
 ; RV64-LABEL: vrem_vi_nxv16i32_0:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    lui a0, 449390
-; RV64-NEXT:    addiw a0, a0, -1171
+; RV64-NEXT:    addi a0, a0, -1171
 ; RV64-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
 ; RV64-NEXT:    vmulh.vx v16, v8, a0
 ; RV64-NEXT:    vsub.vv v16, v16, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
index 4f85acb0bd5ee18..428d071cac399ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
@@ -293,27 +293,16 @@ define <vscale x 1 x i16> @vremu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext
 }
 
 define <vscale x 1 x i16> @vremu_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv1i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT:    vmulhu.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v9, v9, 13
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv1i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT:    vmulhu.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v9, v9, 13
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv1i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmulhu.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v9, v9, 13
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
   %vc = urem <vscale x 1 x i16> %va, %splat
@@ -343,27 +332,16 @@ define <vscale x 2 x i16> @vremu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext
 }
 
 define <vscale x 2 x i16> @vremu_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv2i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT:    vmulhu.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v9, v9, 13
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv2i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT:    vmulhu.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v9, v9, 13
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv2i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmulhu.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v9, v9, 13
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
   %vc = urem <vscale x 2 x i16> %va, %splat
@@ -393,27 +371,16 @@ define <vscale x 4 x i16> @vremu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext
 }
 
 define <vscale x 4 x i16> @vremu_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv4i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT:    vmulhu.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v9, v9, 13
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv4i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT:    vmulhu.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v9, v9, 13
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv4i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmulhu.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v9, v9, 13
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
   %vc = urem <vscale x 4 x i16> %va, %splat
@@ -443,27 +410,16 @@ define <vscale x 8 x i16> @vremu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext
 }
 
 define <vscale x 8 x i16> @vremu_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv8i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT:    vmulhu.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v10, v10, 13
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv8i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT:    vmulhu.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v10, v10, 13
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv8i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmulhu.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v10, v10, 13
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v10
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
   %vc = urem <vscale x 8 x i16> %va, %splat
@@ -493,27 +449,16 @@ define <vscale x 16 x i16> @vremu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signe
 }
 
 define <vscale x 16 x i16> @vremu_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv16i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT:    vmulhu.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v12, v12, 13
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv16i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT:    vmulhu.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v12, v12, 13
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv16i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vmulhu.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v12, v12, 13
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v12
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
   %vc = urem <vscale x 16 x i16> %va, %splat
@@ -543,27 +488,16 @@ define <vscale x 32 x i16> @vremu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signe
 }
 
 define <vscale x 32 x i16> @vremu_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv32i16_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 2
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT:    vmulhu.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v16, v16, 13
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv32i16_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 2
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT:    vmulhu.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v16, v16, 13
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv32i16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vmulhu.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v16, v16, 13
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v16
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i16> poison, i16 -7, i32 0
   %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
   %vc = urem <vscale x 32 x i16> %va, %splat
@@ -593,27 +527,16 @@ define <vscale x 1 x i32> @vremu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 signext
 }
 
 define <vscale x 1 x i32> @vremu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vremu_vi_nxv1i32_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT:    vmulhu.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v9, v9, 29
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv1i32_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 131072
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT:    vmulhu.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v9, v9, 29
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv1i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vmulhu.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v9, v9, 29
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i32> poison, i32 -7, i32 0
   %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
   %vc = urem <vscale x 1 x i32> %va, %splat
@@ -643,27 +566,16 @@ define <vscale x 2 x i32> @vremu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 signext
 }
 
 define <vscale x 2 x i32> @vremu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vremu_vi_nxv2i32_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmulhu.vx v9, v8, a0
-; RV32-NEXT:    vsrl.vi v9, v9, 29
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv2i32_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 131072
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT:    vmulhu.vx v9, v8, a0
-; RV64-NEXT:    vsrl.vi v9, v9, 29
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vmulhu.vx v9, v8, a0
+; CHECK-NEXT:    vsrl.vi v9, v9, 29
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v9
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
   %vc = urem <vscale x 2 x i32> %va, %splat
@@ -693,27 +605,16 @@ define <vscale x 4 x i32> @vremu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 signext
 }
 
 define <vscale x 4 x i32> @vremu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vremu_vi_nxv4i32_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmulhu.vx v10, v8, a0
-; RV32-NEXT:    vsrl.vi v10, v10, 29
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v10
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv4i32_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 131072
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmulhu.vx v10, v8, a0
-; RV64-NEXT:    vsrl.vi v10, v10, 29
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v10
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv4i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vmulhu.vx v10, v8, a0
+; CHECK-NEXT:    vsrl.vi v10, v10, 29
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v10
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> poison, i32 -7, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
   %vc = urem <vscale x 4 x i32> %va, %splat
@@ -743,27 +644,16 @@ define <vscale x 8 x i32> @vremu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 signext
 }
 
 define <vscale x 8 x i32> @vremu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vremu_vi_nxv8i32_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmulhu.vx v12, v8, a0
-; RV32-NEXT:    vsrl.vi v12, v12, 29
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v12
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv8i32_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 131072
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT:    vmulhu.vx v12, v8, a0
-; RV64-NEXT:    vsrl.vi v12, v12, 29
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v12
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv8i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmulhu.vx v12, v8, a0
+; CHECK-NEXT:    vsrl.vi v12, v12, 29
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v12
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> poison, i32 -7, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
   %vc = urem <vscale x 8 x i32> %va, %splat
@@ -793,27 +683,16 @@ define <vscale x 16 x i32> @vremu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 signe
 }
 
 define <vscale x 16 x i32> @vremu_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
-; RV32-LABEL: vremu_vi_nxv16i32_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    addi a0, a0, 1
-; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmulhu.vx v16, v8, a0
-; RV32-NEXT:    vsrl.vi v16, v16, 29
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v16
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vremu_vi_nxv16i32_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 131072
-; RV64-NEXT:    addiw a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT:    vmulhu.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v16, v16, 29
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v16
-; RV64-NEXT:    ret
+; CHECK-LABEL: vremu_vi_nxv16i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 131072
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vmulhu.vx v16, v8, a0
+; CHECK-NEXT:    vsrl.vi v16, v16, 29
+; CHECK-NEXT:    li a0, -7
+; CHECK-NEXT:    vnmsac.vx v8, a0, v16
+; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i32> poison, i32 -7, i32 0
   %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
   %vc = urem <vscale x 16 x i32> %va, %splat
diff --git a/llvm/test/CodeGen/RISCV/select-binop-identity.ll b/llvm/test/CodeGen/RISCV/select-binop-identity.ll
index ebf47cdfd2a1d3e..61344bc8979abb7 100644
--- a/llvm/test/CodeGen/RISCV/select-binop-identity.ll
+++ b/llvm/test/CodeGen/RISCV/select-binop-identity.ll
@@ -266,7 +266,7 @@ define signext i32 @add_select_all_zeros_i32(i1 zeroext %c, i32 signext %x, i32
 ;
 ; RV64I-LABEL: add_select_all_zeros_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    addw a0, a2, a0
 ; RV64I-NEXT:    ret
@@ -366,7 +366,7 @@ define signext i32 @sub_select_all_zeros_i32(i1 zeroext %c, i32 signext %x, i32
 ;
 ; RV64I-LABEL: sub_select_all_zeros_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    subw a0, a2, a0
 ; RV64I-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/select-constant-xor.ll b/llvm/test/CodeGen/RISCV/select-constant-xor.ll
index 2e26ae78e2dd8e4..670bdb26cfeb904 100644
--- a/llvm/test/CodeGen/RISCV/select-constant-xor.ll
+++ b/llvm/test/CodeGen/RISCV/select-constant-xor.ll
@@ -225,7 +225,7 @@ define i32 @oneusecmp(i32 %a, i32 %b, i32 %d) {
 ; RV64-LABEL: oneusecmp:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    sext.w a3, a0
-; RV64-NEXT:    sraiw a0, a0, 31
+; RV64-NEXT:    srai a0, a0, 31
 ; RV64-NEXT:    xori a0, a0, 127
 ; RV64-NEXT:    bltz a3, .LBB10_2
 ; RV64-NEXT:  # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
index eacc26c18415da6..2f03ff969205f6a 100644
--- a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
+++ b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
@@ -42,7 +42,7 @@ define i32 @from_cmpeq_fail_bad_andmask(i32 %xx, i32 %y) {
 ; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    addi a0, a0, -9
 ; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a1, a0
 ; RV64I-NEXT:    andi a0, a0, 3
 ; RV64I-NEXT:    ret
@@ -142,7 +142,7 @@ define i32 @from_i1_fail_bad_select1(i1 %x, i32 %y) {
 ;
 ; RV64I-LABEL: from_i1_fail_bad_select1:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, -1
+; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    and a0, a1, a0
 ; RV64I-NEXT:    andi a0, a0, 1
 ; RV64I-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll
index e07f1d6f594350d..d4a6e9e9dbb4678 100644
--- a/llvm/test/CodeGen/RISCV/select.ll
+++ b/llvm/test/CodeGen/RISCV/select.ll
@@ -147,7 +147,7 @@ define i16 @select_xor_3(i16 %A, i8 %cond) {
 ; RV64IM-LABEL: select_xor_3:
 ; RV64IM:       # %bb.0: # %entry
 ; RV64IM-NEXT:    andi a1, a1, 1
-; RV64IM-NEXT:    addiw a1, a1, -1
+; RV64IM-NEXT:    addi a1, a1, -1
 ; RV64IM-NEXT:    andi a1, a1, 43
 ; RV64IM-NEXT:    xor a0, a0, a1
 ; RV64IM-NEXT:    ret
@@ -189,7 +189,7 @@ define i16 @select_xor_3b(i16 %A, i8 %cond) {
 ; RV64IM-LABEL: select_xor_3b:
 ; RV64IM:       # %bb.0: # %entry
 ; RV64IM-NEXT:    andi a1, a1, 1
-; RV64IM-NEXT:    addiw a1, a1, -1
+; RV64IM-NEXT:    addi a1, a1, -1
 ; RV64IM-NEXT:    andi a1, a1, 43
 ; RV64IM-NEXT:    xor a0, a0, a1
 ; RV64IM-NEXT:    ret
@@ -627,7 +627,7 @@ define i32 @select_add_2(i1 zeroext %cond, i32 %a, i32 %b) {
 ;
 ; RV64IM-LABEL: select_add_2:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    addiw a0, a0, -1
+; RV64IM-NEXT:    addi a0, a0, -1
 ; RV64IM-NEXT:    and a0, a0, a2
 ; RV64IM-NEXT:    addw a0, a1, a0
 ; RV64IM-NEXT:    ret
@@ -665,7 +665,7 @@ define i32 @select_add_3(i1 zeroext %cond, i32 %a) {
 ;
 ; RV64IM-LABEL: select_add_3:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    addiw a0, a0, -1
+; RV64IM-NEXT:    addi a0, a0, -1
 ; RV64IM-NEXT:    andi a0, a0, 42
 ; RV64IM-NEXT:    addw a0, a1, a0
 ; RV64IM-NEXT:    ret
@@ -754,7 +754,7 @@ define i32 @select_sub_2(i1 zeroext %cond, i32 %a, i32 %b) {
 ;
 ; RV64IM-LABEL: select_sub_2:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    addiw a0, a0, -1
+; RV64IM-NEXT:    addi a0, a0, -1
 ; RV64IM-NEXT:    and a0, a0, a2
 ; RV64IM-NEXT:    subw a0, a1, a0
 ; RV64IM-NEXT:    ret
@@ -792,7 +792,7 @@ define i32 @select_sub_3(i1 zeroext %cond, i32 %a) {
 ;
 ; RV64IM-LABEL: select_sub_3:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    addiw a0, a0, -1
+; RV64IM-NEXT:    addi a0, a0, -1
 ; RV64IM-NEXT:    andi a0, a0, 42
 ; RV64IM-NEXT:    subw a0, a1, a0
 ; RV64IM-NEXT:    ret
@@ -1159,7 +1159,7 @@ define i32 @select_shl_2(i1 zeroext %cond, i32 %a, i32 %b) {
 ;
 ; RV64IM-LABEL: select_shl_2:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    addiw a0, a0, -1
+; RV64IM-NEXT:    addi a0, a0, -1
 ; RV64IM-NEXT:    and a0, a0, a2
 ; RV64IM-NEXT:    sllw a0, a1, a0
 ; RV64IM-NEXT:    ret
@@ -1256,7 +1256,7 @@ define i32 @select_ashr_2(i1 zeroext %cond, i32 %a, i32 %b) {
 ;
 ; RV64IM-LABEL: select_ashr_2:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    addiw a0, a0, -1
+; RV64IM-NEXT:    addi a0, a0, -1
 ; RV64IM-NEXT:    and a0, a0, a2
 ; RV64IM-NEXT:    sraw a0, a1, a0
 ; RV64IM-NEXT:    ret
@@ -1353,7 +1353,7 @@ define i32 @select_lshr_2(i1 zeroext %cond, i32 %a, i32 %b) {
 ;
 ; RV64IM-LABEL: select_lshr_2:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    addiw a0, a0, -1
+; RV64IM-NEXT:    addi a0, a0, -1
 ; RV64IM-NEXT:    and a0, a0, a2
 ; RV64IM-NEXT:    srlw a0, a1, a0
 ; RV64IM-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll
index f9b72ccb26f3829..3e6893731dd0311 100644
--- a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll
+++ b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll
@@ -76,19 +76,12 @@ define i32 @not_pos_sel_same_variable(i32 signext %a) {
 
 ; Compare if positive and select of constants where one constant is zero.
 define i32 @pos_sel_constants(i32 signext %a) {
-; RV32-LABEL: pos_sel_constants:
-; RV32:       # %bb.0:
-; RV32-NEXT:    slti a0, a0, 0
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    andi a0, a0, 5
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: pos_sel_constants:
-; RV64:       # %bb.0:
-; RV64-NEXT:    slti a0, a0, 0
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    andi a0, a0, 5
-; RV64-NEXT:    ret
+; CHECK-LABEL: pos_sel_constants:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slti a0, a0, 0
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    andi a0, a0, 5
+; CHECK-NEXT:    ret
   %tmp.1 = icmp sgt i32 %a, -1
   %retval = select i1 %tmp.1, i32 5, i32 0
   ret i32 %retval
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index f9ffb6603fa0e4b..3babef93499c859 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -181,9 +181,9 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind {
 ; RV64I-NEXT:    lui a1, 209715
 ; RV64I-NEXT:    addiw s1, a1, 819
 ; RV64I-NEXT:    lui a1, 61681
-; RV64I-NEXT:    addiw s2, a1, -241
+; RV64I-NEXT:    addi s2, a1, -241
 ; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw s3, a1, 257
+; RV64I-NEXT:    addi s3, a1, 257
 ; RV64I-NEXT:  .LBB4_1: # %bb2
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    call bar at plt
@@ -1349,7 +1349,7 @@ define signext i32 @sextw_sh2add(i1 zeroext %0, ptr %1, i32 signext %2, i32 sign
 ; NOREMOVAL-LABEL: sextw_sh2add:
 ; NOREMOVAL:       # %bb.0:
 ; NOREMOVAL-NEXT:    sh2add a2, a2, a3
-; NOREMOVAL-NEXT:    sext.w a2, a2
+; NOREMOVAL-NEXT:    mv a2, a2
 ; NOREMOVAL-NEXT:    beqz a0, .LBB22_2
 ; NOREMOVAL-NEXT:  # %bb.1:
 ; NOREMOVAL-NEXT:    sw a2, 0(a1)
diff --git a/llvm/test/CodeGen/RISCV/shl-demanded.ll b/llvm/test/CodeGen/RISCV/shl-demanded.ll
index 4e3c063eff2deab..b0e3ebcd505665c 100644
--- a/llvm/test/CodeGen/RISCV/shl-demanded.ll
+++ b/llvm/test/CodeGen/RISCV/shl-demanded.ll
@@ -239,7 +239,7 @@ define i32 @set_shl_mask(i32 %x, i32 %y) {
 ; RV64I-LABEL: set_shl_mask:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a3, a2, 1
+; RV64I-NEXT:    addi a3, a2, 1
 ; RV64I-NEXT:    or a0, a0, a3
 ; RV64I-NEXT:    sllw a0, a0, a1
 ; RV64I-NEXT:    and a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/shlimm-addimm.ll b/llvm/test/CodeGen/RISCV/shlimm-addimm.ll
index 4a4762a0db146ca..ead71bcbe113c9e 100644
--- a/llvm/test/CodeGen/RISCV/shlimm-addimm.ll
+++ b/llvm/test/CodeGen/RISCV/shlimm-addimm.ll
@@ -78,7 +78,7 @@ define i32 @shl5_add101024_a(i32 %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 5
 ; RV64I-NEXT:    lui a1, 25
-; RV64I-NEXT:    addiw a1, a1, -1376
+; RV64I-NEXT:    addi a1, a1, -1376
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %tmp0 = shl i32 %x, 5
@@ -99,7 +99,7 @@ define signext i32 @shl5_add101024_b(i32 signext %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 5
 ; RV64I-NEXT:    lui a1, 25
-; RV64I-NEXT:    addiw a1, a1, -1376
+; RV64I-NEXT:    addi a1, a1, -1376
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %tmp0 = shl i32 %x, 5
@@ -146,7 +146,7 @@ define i32 @shl5_add47968_a(i32 %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 5
 ; RV64I-NEXT:    lui a1, 12
-; RV64I-NEXT:    addiw a1, a1, -1184
+; RV64I-NEXT:    addi a1, a1, -1184
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %tmp0 = shl i32 %x, 5
@@ -167,7 +167,7 @@ define signext i32 @shl5_add47968_b(i32 signext %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 5
 ; RV64I-NEXT:    lui a1, 12
-; RV64I-NEXT:    addiw a1, a1, -1184
+; RV64I-NEXT:    addi a1, a1, -1184
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %tmp0 = shl i32 %x, 5
@@ -214,7 +214,7 @@ define i32 @shl5_add47969_a(i32 %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 5
 ; RV64I-NEXT:    lui a1, 12
-; RV64I-NEXT:    addiw a1, a1, -1183
+; RV64I-NEXT:    addi a1, a1, -1183
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %tmp0 = shl i32 %x, 5
@@ -235,7 +235,7 @@ define signext i32 @shl5_add47969_b(i32 signext %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 5
 ; RV64I-NEXT:    lui a1, 12
-; RV64I-NEXT:    addiw a1, a1, -1183
+; RV64I-NEXT:    addi a1, a1, -1183
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %tmp0 = shl i32 %x, 5
@@ -282,7 +282,7 @@ define i32 @shl5_sub47968_a(i32 %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 5
 ; RV64I-NEXT:    lui a1, 1048564
-; RV64I-NEXT:    addiw a1, a1, 1184
+; RV64I-NEXT:    addi a1, a1, 1184
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %tmp0 = shl i32 %x, 5
@@ -303,7 +303,7 @@ define signext i32 @shl5_sub47968_b(i32 signext %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 5
 ; RV64I-NEXT:    lui a1, 1048564
-; RV64I-NEXT:    addiw a1, a1, 1184
+; RV64I-NEXT:    addi a1, a1, 1184
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %tmp0 = shl i32 %x, 5
@@ -351,7 +351,7 @@ define i32 @shl5_sub47969_a(i32 %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 5
 ; RV64I-NEXT:    lui a1, 1048564
-; RV64I-NEXT:    addiw a1, a1, 1183
+; RV64I-NEXT:    addi a1, a1, 1183
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %tmp0 = shl i32 %x, 5
@@ -372,7 +372,7 @@ define signext i32 @shl5_sub47969_b(i32 signext %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 5
 ; RV64I-NEXT:    lui a1, 1048564
-; RV64I-NEXT:    addiw a1, a1, 1183
+; RV64I-NEXT:    addi a1, a1, 1183
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
   %tmp0 = shl i32 %x, 5
diff --git a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll
index 4eea8643101b159..0860853ae9c0af9 100644
--- a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll
@@ -265,7 +265,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64-LABEL: add_ugecmp_i16_i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, -128
+; RV64-NEXT:    addi a0, a0, -128
 ; RV64-NEXT:    slli a0, a0, 48
 ; RV64-NEXT:    srli a0, a0, 56
 ; RV64-NEXT:    sltiu a0, a0, 255
@@ -482,7 +482,7 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64-LABEL: add_ugtcmp_i16_i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, -128
+; RV64-NEXT:    addi a0, a0, -128
 ; RV64-NEXT:    slli a0, a0, 48
 ; RV64-NEXT:    srli a0, a0, 56
 ; RV64-NEXT:    sltiu a0, a0, 255
@@ -508,7 +508,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ultcmp_i16_i8:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 128
+; RV64I-NEXT:    addi a0, a0, 128
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 256
@@ -523,7 +523,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ultcmp_i16_i8:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 128
+; RV64ZBB-NEXT:    addi a0, a0, 128
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 256
 ; RV64ZBB-NEXT:    ret
@@ -688,7 +688,7 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ulecmp_i16_i8:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 128
+; RV64I-NEXT:    addi a0, a0, 128
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 256
@@ -703,7 +703,7 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ulecmp_i16_i8:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 128
+; RV64ZBB-NEXT:    addi a0, a0, 128
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 256
 ; RV64ZBB-NEXT:    ret
@@ -784,7 +784,7 @@ define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
 ; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_cmp:
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    zext.h a1, a1
-; RV64ZBB-NEXT:    addiw a0, a0, 128
+; RV64ZBB-NEXT:    addi a0, a0, 128
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltu a0, a0, a1
 ; RV64ZBB-NEXT:    ret
@@ -805,7 +805,7 @@ define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ultcmp_bad_i8_i16:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 128
+; RV64I-NEXT:    addi a0, a0, 128
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 128
@@ -820,7 +820,7 @@ define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ultcmp_bad_i8_i16:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 128
+; RV64ZBB-NEXT:    addi a0, a0, 128
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 128
 ; RV64ZBB-NEXT:    ret
@@ -841,7 +841,7 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 192
+; RV64I-NEXT:    addi a0, a0, 192
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 256
@@ -856,7 +856,7 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 192
+; RV64ZBB-NEXT:    addi a0, a0, 192
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 256
 ; RV64ZBB-NEXT:    ret
@@ -877,7 +877,7 @@ define i1 @add_ultcmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ultcmp_bad_i16_i8_c1notpoweroftwo:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 128
+; RV64I-NEXT:    addi a0, a0, 128
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 768
@@ -892,7 +892,7 @@ define i1 @add_ultcmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_c1notpoweroftwo:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 128
+; RV64ZBB-NEXT:    addi a0, a0, 128
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 768
 ; RV64ZBB-NEXT:    ret
@@ -913,7 +913,7 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ultcmp_bad_i16_i8_magic:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 64
+; RV64I-NEXT:    addi a0, a0, 64
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 256
@@ -928,7 +928,7 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_magic:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 64
+; RV64ZBB-NEXT:    addi a0, a0, 64
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 256
 ; RV64ZBB-NEXT:    ret
@@ -949,7 +949,7 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
 ;
 ; RV64I-LABEL: add_ultcmp_bad_i16_i4:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addiw a0, a0, 8
+; RV64I-NEXT:    addi a0, a0, 8
 ; RV64I-NEXT:    slli a0, a0, 48
 ; RV64I-NEXT:    srli a0, a0, 48
 ; RV64I-NEXT:    sltiu a0, a0, 16
@@ -964,7 +964,7 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
 ;
 ; RV64ZBB-LABEL: add_ultcmp_bad_i16_i4:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addiw a0, a0, 8
+; RV64ZBB-NEXT:    addi a0, a0, 8
 ; RV64ZBB-NEXT:    zext.h a0, a0
 ; RV64ZBB-NEXT:    sltiu a0, a0, 16
 ; RV64ZBB-NEXT:    ret
@@ -985,7 +985,7 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
 ;
 ; RV64-LABEL: add_ultcmp_bad_i24_i8:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, 128
+; RV64-NEXT:    addi a0, a0, 128
 ; RV64-NEXT:    slli a0, a0, 40
 ; RV64-NEXT:    srli a0, a0, 40
 ; RV64-NEXT:    sltiu a0, a0, 256
diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll
index 24e740fd143d131..0dc887e6b30d14b 100644
--- a/llvm/test/CodeGen/RISCV/srem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll
@@ -47,8 +47,8 @@ define i32 @fold_srem_positive_odd(i32 %x) nounwind {
 ; RV64IM-NEXT:    mul a1, a1, a2
 ; RV64IM-NEXT:    srli a1, a1, 32
 ; RV64IM-NEXT:    add a1, a1, a0
-; RV64IM-NEXT:    srliw a2, a1, 31
-; RV64IM-NEXT:    sraiw a1, a1, 6
+; RV64IM-NEXT:    srli a2, a1, 31
+; RV64IM-NEXT:    srai a1, a1, 6
 ; RV64IM-NEXT:    add a1, a1, a2
 ; RV64IM-NEXT:    li a2, 95
 ; RV64IM-NEXT:    mul a1, a1, a2
@@ -198,7 +198,7 @@ define i32 @fold_srem_negative_even(i32 %x) nounwind {
 ; RV64IM-NEXT:    srai a1, a1, 40
 ; RV64IM-NEXT:    add a1, a1, a2
 ; RV64IM-NEXT:    lui a2, 1048570
-; RV64IM-NEXT:    addiw a2, a2, 1595
+; RV64IM-NEXT:    addi a2, a2, 1595
 ; RV64IM-NEXT:    mul a1, a1, a2
 ; RV64IM-NEXT:    subw a0, a0, a1
 ; RV64IM-NEXT:    ret
@@ -273,8 +273,8 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind {
 ; RV64IM-NEXT:    mul a1, a1, a2
 ; RV64IM-NEXT:    srli a1, a1, 32
 ; RV64IM-NEXT:    add a1, a1, a0
-; RV64IM-NEXT:    srliw a2, a1, 31
-; RV64IM-NEXT:    sraiw a1, a1, 6
+; RV64IM-NEXT:    srli a2, a1, 31
+; RV64IM-NEXT:    srai a1, a1, 6
 ; RV64IM-NEXT:    add a1, a1, a2
 ; RV64IM-NEXT:    li a2, 95
 ; RV64IM-NEXT:    mul a2, a1, a2
@@ -309,8 +309,8 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
 ;
 ; RV64I-LABEL: dont_fold_srem_power_of_two:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 26
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 26
 ; RV64I-NEXT:    add a1, a0, a1
 ; RV64I-NEXT:    andi a1, a1, -64
 ; RV64I-NEXT:    subw a0, a0, a1
@@ -318,8 +318,8 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
 ;
 ; RV64IM-LABEL: dont_fold_srem_power_of_two:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sraiw a1, a0, 31
-; RV64IM-NEXT:    srliw a1, a1, 26
+; RV64IM-NEXT:    srai a1, a0, 31
+; RV64IM-NEXT:    srli a1, a1, 26
 ; RV64IM-NEXT:    add a1, a0, a1
 ; RV64IM-NEXT:    andi a1, a1, -64
 ; RV64IM-NEXT:    subw a0, a0, a1
@@ -362,8 +362,8 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind {
 ;
 ; RV64I-LABEL: dont_fold_srem_i32_smax:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
-; RV64I-NEXT:    srliw a1, a1, 1
+; RV64I-NEXT:    srai a1, a0, 31
+; RV64I-NEXT:    srli a1, a1, 1
 ; RV64I-NEXT:    add a1, a0, a1
 ; RV64I-NEXT:    lui a2, 524288
 ; RV64I-NEXT:    and a1, a1, a2
@@ -372,8 +372,8 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind {
 ;
 ; RV64IM-LABEL: dont_fold_srem_i32_smax:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    sraiw a1, a0, 31
-; RV64IM-NEXT:    srliw a1, a1, 1
+; RV64IM-NEXT:    srai a1, a0, 31
+; RV64IM-NEXT:    srli a1, a1, 1
 ; RV64IM-NEXT:    add a1, a0, a1
 ; RV64IM-NEXT:    lui a2, 524288
 ; RV64IM-NEXT:    and a1, a1, a2
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index dcf701be76f62a9..6ed352b51f25459 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -34,7 +34,7 @@ define i1 @test_srem_odd(i29 %X) nounwind {
 ; RV64-NEXT:    addiw a1, a1, 331
 ; RV64-NEXT:    call __muldi3 at plt
 ; RV64-NEXT:    lui a1, 662
-; RV64-NEXT:    addiw a1, a1, -83
+; RV64-NEXT:    addi a1, a1, -83
 ; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    slli a0, a0, 35
 ; RV64-NEXT:    srli a0, a0, 35
@@ -63,10 +63,10 @@ define i1 @test_srem_odd(i29 %X) nounwind {
 ; RV64M-LABEL: test_srem_odd:
 ; RV64M:       # %bb.0:
 ; RV64M-NEXT:    lui a1, 128424
-; RV64M-NEXT:    addiw a1, a1, 331
+; RV64M-NEXT:    addi a1, a1, 331
 ; RV64M-NEXT:    mul a0, a0, a1
 ; RV64M-NEXT:    lui a1, 662
-; RV64M-NEXT:    addiw a1, a1, -83
+; RV64M-NEXT:    addi a1, a1, -83
 ; RV64M-NEXT:    add a0, a0, a1
 ; RV64M-NEXT:    slli a0, a0, 35
 ; RV64M-NEXT:    srli a0, a0, 35
@@ -93,10 +93,10 @@ define i1 @test_srem_odd(i29 %X) nounwind {
 ; RV64MV-LABEL: test_srem_odd:
 ; RV64MV:       # %bb.0:
 ; RV64MV-NEXT:    lui a1, 128424
-; RV64MV-NEXT:    addiw a1, a1, 331
+; RV64MV-NEXT:    addi a1, a1, 331
 ; RV64MV-NEXT:    mul a0, a0, a1
 ; RV64MV-NEXT:    lui a1, 662
-; RV64MV-NEXT:    addiw a1, a1, -83
+; RV64MV-NEXT:    addi a1, a1, -83
 ; RV64MV-NEXT:    add a0, a0, a1
 ; RV64MV-NEXT:    slli a0, a0, 35
 ; RV64MV-NEXT:    srli a0, a0, 35
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index b5f1efa4b160ba9..2e0c541311e10b2 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -909,7 +909,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    srli a3, a3, 11
 ; RV64IM-NEXT:    add a3, a3, a5
 ; RV64IM-NEXT:    lui a5, 1
-; RV64IM-NEXT:    addiw a5, a5, 1327
+; RV64IM-NEXT:    addi a5, a5, 1327
 ; RV64IM-NEXT:    mul a3, a3, a5
 ; RV64IM-NEXT:    subw a4, a4, a3
 ; RV64IM-NEXT:    sh zero, 0(a0)
@@ -1053,7 +1053,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    srli a3, a3, 11
 ; RV64IM-NEXT:    add a3, a3, a5
 ; RV64IM-NEXT:    lui a5, 1
-; RV64IM-NEXT:    addiw a5, a5, 1327
+; RV64IM-NEXT:    addi a5, a5, 1327
 ; RV64IM-NEXT:    mul a3, a3, a5
 ; RV64IM-NEXT:    subw a4, a4, a3
 ; RV64IM-NEXT:    srli a3, a1, 49
diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll
index 3d181c3a30d0947..1b2cc1398ec11cf 100644
--- a/llvm/test/CodeGen/RISCV/urem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll
@@ -44,7 +44,7 @@ define i32 @fold_urem_positive_odd(i32 %x) nounwind {
 ; RV64IM:       # %bb.0:
 ; RV64IM-NEXT:    slli a1, a0, 32
 ; RV64IM-NEXT:    lui a2, 364242
-; RV64IM-NEXT:    addiw a2, a2, 777
+; RV64IM-NEXT:    addi a2, a2, 777
 ; RV64IM-NEXT:    slli a2, a2, 32
 ; RV64IM-NEXT:    mulhu a1, a1, a2
 ; RV64IM-NEXT:    srli a1, a1, 32
@@ -94,7 +94,7 @@ define i32 @fold_urem_positive_even(i32 %x) nounwind {
 ; RV64IM:       # %bb.0:
 ; RV64IM-NEXT:    slli a1, a0, 32
 ; RV64IM-NEXT:    lui a2, 1012964
-; RV64IM-NEXT:    addiw a2, a2, -61
+; RV64IM-NEXT:    addi a2, a2, -61
 ; RV64IM-NEXT:    slli a2, a2, 32
 ; RV64IM-NEXT:    mulhu a1, a1, a2
 ; RV64IM-NEXT:    srli a1, a1, 42
@@ -170,7 +170,7 @@ define i32 @combine_urem_udiv(i32 %x) nounwind {
 ; RV64IM:       # %bb.0:
 ; RV64IM-NEXT:    slli a1, a0, 32
 ; RV64IM-NEXT:    lui a2, 364242
-; RV64IM-NEXT:    addiw a2, a2, 777
+; RV64IM-NEXT:    addi a2, a2, 777
 ; RV64IM-NEXT:    slli a2, a2, 32
 ; RV64IM-NEXT:    mulhu a1, a1, a2
 ; RV64IM-NEXT:    srli a1, a1, 32
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 456d98fd4e47ffe..df30946218dfad8 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -48,7 +48,7 @@ define i1 @test_urem_odd(i13 %X) nounwind {
 ; RV64M-LABEL: test_urem_odd:
 ; RV64M:       # %bb.0:
 ; RV64M-NEXT:    lui a1, 1
-; RV64M-NEXT:    addiw a1, a1, -819
+; RV64M-NEXT:    addi a1, a1, -819
 ; RV64M-NEXT:    mul a0, a0, a1
 ; RV64M-NEXT:    slli a0, a0, 51
 ; RV64M-NEXT:    srli a0, a0, 51
@@ -68,7 +68,7 @@ define i1 @test_urem_odd(i13 %X) nounwind {
 ; RV64MV-LABEL: test_urem_odd:
 ; RV64MV:       # %bb.0:
 ; RV64MV-NEXT:    lui a1, 1
-; RV64MV-NEXT:    addiw a1, a1, -819
+; RV64MV-NEXT:    addi a1, a1, -819
 ; RV64MV-NEXT:    mul a0, a0, a1
 ; RV64MV-NEXT:    slli a0, a0, 51
 ; RV64MV-NEXT:    srli a0, a0, 51
@@ -139,7 +139,7 @@ define i1 @test_urem_even(i27 %X) nounwind {
 ; RV64M-LABEL: test_urem_even:
 ; RV64M:       # %bb.0:
 ; RV64M-NEXT:    lui a1, 28087
-; RV64M-NEXT:    addiw a1, a1, -585
+; RV64M-NEXT:    addi a1, a1, -585
 ; RV64M-NEXT:    mul a0, a0, a1
 ; RV64M-NEXT:    slli a1, a0, 26
 ; RV64M-NEXT:    slli a0, a0, 37
@@ -171,7 +171,7 @@ define i1 @test_urem_even(i27 %X) nounwind {
 ; RV64MV-LABEL: test_urem_even:
 ; RV64MV:       # %bb.0:
 ; RV64MV-NEXT:    lui a1, 28087
-; RV64MV-NEXT:    addiw a1, a1, -585
+; RV64MV-NEXT:    addi a1, a1, -585
 ; RV64MV-NEXT:    mul a0, a0, a1
 ; RV64MV-NEXT:    slli a1, a0, 26
 ; RV64MV-NEXT:    slli a0, a0, 37
@@ -405,18 +405,18 @@ define void @test_urem_vec(ptr %X) nounwind {
 ; RV64-NEXT:    li a1, 1463
 ; RV64-NEXT:    mv a0, s2
 ; RV64-NEXT:    call __muldi3 at plt
-; RV64-NEXT:    addiw a0, a0, -1463
+; RV64-NEXT:    addi a0, a0, -1463
 ; RV64-NEXT:    andi a0, a0, 2047
 ; RV64-NEXT:    sltiu s2, a0, 293
 ; RV64-NEXT:    li a1, 819
 ; RV64-NEXT:    mv a0, s1
 ; RV64-NEXT:    call __muldi3 at plt
-; RV64-NEXT:    addiw a0, a0, -1638
+; RV64-NEXT:    addi a0, a0, -1638
 ; RV64-NEXT:    andi a0, a0, 2047
 ; RV64-NEXT:    sltiu a0, a0, 2
-; RV64-NEXT:    addiw s3, s3, -1
+; RV64-NEXT:    addi s3, s3, -1
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    addiw s2, s2, -1
+; RV64-NEXT:    addi s2, s2, -1
 ; RV64-NEXT:    andi a1, s3, 2047
 ; RV64-NEXT:    andi a2, s2, 2047
 ; RV64-NEXT:    slli a2, a2, 11
@@ -496,17 +496,17 @@ define void @test_urem_vec(ptr %X) nounwind {
 ; RV64M-NEXT:    sltiu a1, a1, 342
 ; RV64M-NEXT:    li a4, 1463
 ; RV64M-NEXT:    mul a3, a3, a4
-; RV64M-NEXT:    addiw a3, a3, -1463
+; RV64M-NEXT:    addi a3, a3, -1463
 ; RV64M-NEXT:    andi a3, a3, 2047
 ; RV64M-NEXT:    sltiu a3, a3, 293
 ; RV64M-NEXT:    li a4, 819
 ; RV64M-NEXT:    mul a2, a2, a4
-; RV64M-NEXT:    addiw a2, a2, -1638
+; RV64M-NEXT:    addi a2, a2, -1638
 ; RV64M-NEXT:    andi a2, a2, 2047
 ; RV64M-NEXT:    sltiu a2, a2, 2
-; RV64M-NEXT:    addiw a1, a1, -1
+; RV64M-NEXT:    addi a1, a1, -1
 ; RV64M-NEXT:    addi a2, a2, -1
-; RV64M-NEXT:    addiw a3, a3, -1
+; RV64M-NEXT:    addi a3, a3, -1
 ; RV64M-NEXT:    andi a1, a1, 2047
 ; RV64M-NEXT:    andi a3, a3, 2047
 ; RV64M-NEXT:    slli a3, a3, 11
@@ -604,7 +604,7 @@ define void @test_urem_vec(ptr %X) nounwind {
 ; RV64MV-NEXT:    vmul.vv v8, v8, v9
 ; RV64MV-NEXT:    vadd.vv v9, v8, v8
 ; RV64MV-NEXT:    lui a2, 41121
-; RV64MV-NEXT:    addiw a2, a2, -1527
+; RV64MV-NEXT:    addi a2, a2, -1527
 ; RV64MV-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; RV64MV-NEXT:    vmv.s.x v10, a2
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
index a38ae17f19df385..ac67b9005b3d070 100644
--- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
@@ -755,7 +755,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    subw a1, a1, a3
 ; RV64IM-NEXT:    mulhu a3, a4, a5
 ; RV64IM-NEXT:    lui a5, 1
-; RV64IM-NEXT:    addiw a5, a5, 1327
+; RV64IM-NEXT:    addi a5, a5, 1327
 ; RV64IM-NEXT:    mul a3, a3, a5
 ; RV64IM-NEXT:    subw a4, a4, a3
 ; RV64IM-NEXT:    sh zero, 0(a0)
diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll
index e8035dd226bf6ce..59aa1d9ae28932a 100644
--- a/llvm/test/CodeGen/RISCV/vararg.ll
+++ b/llvm/test/CodeGen/RISCV/vararg.ll
@@ -566,7 +566,7 @@ define i64 @va2(ptr %fmt, ...) nounwind {
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    addi a0, sp, 24
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    sd a0, 8(sp)
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    lw a0, 8(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT:    addiw a0, a0, 7
+; LP64-LP64F-LP64D-FPELIM-NEXT:    addi a0, a0, 7
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    slli a1, a0, 32
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    srli a1, a1, 32
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    addi a1, a1, 8
@@ -593,7 +593,7 @@ define i64 @va2(ptr %fmt, ...) nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a0, s0, 8
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a0, -24(s0)
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    lw a0, -24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT:    addiw a0, a0, 7
+; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a0, a0, 7
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    slli a1, a0, 32
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    srli a1, a1, 32
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a1, a1, 8
@@ -888,7 +888,7 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    sd a0, 8(sp)
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    lw a0, 8(sp)
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    sd a2, 16(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT:    addiw a0, a0, 7
+; LP64-LP64F-LP64D-FPELIM-NEXT:    addi a0, a0, 7
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    slli a2, a0, 32
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    srli a2, a2, 32
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    addi a2, a2, 8
@@ -915,7 +915,7 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a0, -24(s0)
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    lw a0, -24(s0)
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a2, 0(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT:    addiw a0, a0, 7
+; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a0, a0, 7
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    slli a2, a0, 32
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    srli a2, a2, 32
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a2, a2, 8
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index 754237c3456dc03..85d28122537ea9c 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -193,7 +193,7 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) {
 ; RV64-LABEL: saddo4.i32:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    lui a2, 4096
-; RV64-NEXT:    addiw a2, a2, -1
+; RV64-NEXT:    addi a2, a2, -1
 ; RV64-NEXT:    addw a2, a0, a2
 ; RV64-NEXT:    slt a0, a2, a0
 ; RV64-NEXT:    sw a2, 0(a1)
@@ -211,7 +211,7 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) {
 ; RV64ZBA-LABEL: saddo4.i32:
 ; RV64ZBA:       # %bb.0: # %entry
 ; RV64ZBA-NEXT:    lui a2, 4096
-; RV64ZBA-NEXT:    addiw a2, a2, -1
+; RV64ZBA-NEXT:    addi a2, a2, -1
 ; RV64ZBA-NEXT:    addw a2, a0, a2
 ; RV64ZBA-NEXT:    slt a0, a2, a0
 ; RV64ZBA-NEXT:    sw a2, 0(a1)
@@ -229,7 +229,7 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) {
 ; RV64ZICOND-LABEL: saddo4.i32:
 ; RV64ZICOND:       # %bb.0: # %entry
 ; RV64ZICOND-NEXT:    lui a2, 4096
-; RV64ZICOND-NEXT:    addiw a2, a2, -1
+; RV64ZICOND-NEXT:    addi a2, a2, -1
 ; RV64ZICOND-NEXT:    addw a2, a0, a2
 ; RV64ZICOND-NEXT:    slt a0, a2, a0
 ; RV64ZICOND-NEXT:    sw a2, 0(a1)



More information about the llvm-commits mailing list