[llvm] [RISCV] Strip W suffix from ADDIW, SRLIW, and SRAIW (PR #68425)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 6 08:43:27 PDT 2023
https://github.com/preames created https://github.com/llvm/llvm-project/pull/68425
The original motivation of this change was simply to reduce test duplication. As can be seen in the (massive) test delta, we have many tests whose output differ only due to the use of addi on rv32 vs addiw on rv64 when the high bits are don't care.
However, after reading the isa specification, I believe this to also be a compressibility optimization. There doesn't seem to be compressed versions of these instructions (or of slliw despite what the previous comment says), so using the non-W variant should allow the formation of more compressed instructions.
As an aside, we don't need to worry about the non-zero immediate restriction on the compressed variants because we're not directly forming the compressed variants. If we happen to get a zero immediate for e.g. the ADDI, then either a later optimization will strip the useless instruction or the encoder is responsible for not compressing the instruction.
>From 0606f00b16c76c059e78dc8f57c01db40dbac621 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Fri, 6 Oct 2023 07:51:03 -0700
Subject: [PATCH] [RISCV] Strip W suffix from ADDIW, SRLIW, and SRAIW
The original motivation of this change was simply to reduce test duplication. As can be seen in the (massive) test delta, we have many tests whose output differ only due to the use of addi on rv32 vs addiw on rv64 when the high bits are don't care.
However, after reading the isa specification, I believe this to also be a compressibility optimization. There doesn't seem to be compressed versions of these instructions (or of slliw despite what the previous comment says), so using the non-W variant should allow the formation of more compressed instructions.
As an aside, we don't need to worry about the non-zero immediate restriction on the compressed variants because we're not directly forming the compressed variants. If we happen to get a zero immediate for e.g. the ADDI, then either a later optimization will strip the useless instruction or the encoder is responsible for not compressing the instruction.
---
llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp | 14 +-
.../CodeGen/RISCV/GlobalISel/alu-roundtrip.ll | 4 +-
llvm/test/CodeGen/RISCV/add-before-shl.ll | 12 +-
llvm/test/CodeGen/RISCV/add-imm.ll | 20 +-
llvm/test/CodeGen/RISCV/addimm-mulimm.ll | 56 +-
llvm/test/CodeGen/RISCV/and.ll | 2 +-
llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 32 +-
llvm/test/CodeGen/RISCV/atomic-rmw.ll | 182 +-
llvm/test/CodeGen/RISCV/atomic-signext.ll | 22 +-
.../CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll | 4 +-
llvm/test/CodeGen/RISCV/bfloat-convert.ll | 8 +-
llvm/test/CodeGen/RISCV/bfloat.ll | 8 +-
llvm/test/CodeGen/RISCV/bittest.ll | 8 +-
llvm/test/CodeGen/RISCV/bswap-bitreverse.ll | 6 +-
llvm/test/CodeGen/RISCV/calling-conv-half.ll | 2 +-
llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll | 24 +-
.../CodeGen/RISCV/ctz_zero_return_test.ll | 16 +-
llvm/test/CodeGen/RISCV/div-by-constant.ll | 4 +-
llvm/test/CodeGen/RISCV/div-pow2.ll | 28 +-
llvm/test/CodeGen/RISCV/div.ll | 20 +-
llvm/test/CodeGen/RISCV/double-convert.ll | 22 +-
.../early-clobber-tied-def-subreg-liveness.ll | 2 +-
llvm/test/CodeGen/RISCV/float-convert.ll | 18 +-
.../test/CodeGen/RISCV/fold-addi-loadstore.ll | 4 +-
llvm/test/CodeGen/RISCV/half-convert.ll | 46 +-
.../CodeGen/RISCV/hoist-global-addr-base.ll | 44 +-
llvm/test/CodeGen/RISCV/iabs.ll | 6 +-
llvm/test/CodeGen/RISCV/imm.ll | 20 +-
.../RISCV/lack-of-signed-truncation-check.ll | 36 +-
.../CodeGen/RISCV/machine-outliner-throw.ll | 4 +-
llvm/test/CodeGen/RISCV/memcpy.ll | 6 +-
llvm/test/CodeGen/RISCV/neg-abs.ll | 10 +-
.../test/CodeGen/RISCV/overflow-intrinsics.ll | 12 +-
llvm/test/CodeGen/RISCV/rem.ll | 18 +-
.../CodeGen/RISCV/riscv-codegenprepare-asm.ll | 8 +-
.../test/CodeGen/RISCV/rv64i-demanded-bits.ll | 6 +-
.../CodeGen/RISCV/rv64i-exhaustive-w-insts.ll | 16 +-
llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll | 4 +-
.../RISCV/rv64i-w-insts-legalization.ll | 6 +-
llvm/test/CodeGen/RISCV/rv64xtheadbb.ll | 10 +-
llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll | 2 +-
llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll | 4 +-
llvm/test/CodeGen/RISCV/rv64zbb.ll | 28 +-
llvm/test/CodeGen/RISCV/rv64zbkb.ll | 2 +-
.../CodeGen/RISCV/rvv/bitreverse-sdnode.ll | 1055 ++--
llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll | 2655 +++-----
llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll | 230 +-
llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll | 460 +-
llvm/test/CodeGen/RISCV/rvv/combine-sats.ll | 47 +-
llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll | 2237 +++----
llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll | 391 +-
llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll | 785 +--
llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll | 1649 ++---
llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll | 1919 ++----
llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll | 2086 ++-----
.../CodeGen/RISCV/rvv/extractelt-int-rv64.ll | 4 +-
.../RISCV/rvv/fixed-vectors-bitreverse-vp.ll | 2005 ++----
.../RISCV/rvv/fixed-vectors-bitreverse.ll | 42 +-
.../RISCV/rvv/fixed-vectors-bswap-vp.ll | 368 +-
.../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll | 56 +-
.../rvv/fixed-vectors-buildvec-of-binop.ll | 4 +-
.../RISCV/rvv/fixed-vectors-ctlz-vp.ll | 5504 +++++++----------
.../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll | 815 +--
.../RISCV/rvv/fixed-vectors-ctpop-vp.ll | 1144 ++--
.../CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll | 661 +-
.../RISCV/rvv/fixed-vectors-cttz-vp.ll | 2688 +++-----
.../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll | 689 +--
.../RISCV/rvv/fixed-vectors-extract.ll | 6 +-
.../RISCV/rvv/fixed-vectors-fp-interleave.ll | 151 +-
.../RISCV/rvv/fixed-vectors-fp-shuffles.ll | 74 +-
.../RISCV/rvv/fixed-vectors-int-buildvec.ll | 138 +-
.../RISCV/rvv/fixed-vectors-int-interleave.ll | 213 +-
.../RISCV/rvv/fixed-vectors-int-shuffles.ll | 163 +-
.../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 987 +--
.../rvv/fixed-vectors-interleaved-access.ll | 4 +-
.../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 101 +-
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 2 +-
.../rvv/fixed-vectors-reduction-formation.ll | 52 +-
.../rvv/fixed-vectors-shuffle-transpose.ll | 89 +-
.../rvv/fixed-vectors-shuffle-vslide1up.ll | 31 +-
.../CodeGen/RISCV/rvv/fixed-vectors-store.ll | 119 +-
.../fixed-vectors-strided-load-store-asm.ll | 10 +-
.../test/CodeGen/RISCV/rvv/fold-vector-cmp.ll | 2 +-
.../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 36 +-
.../RISCV/rvv/named-vector-shuffle-reverse.ll | 814 +--
llvm/test/CodeGen/RISCV/rvv/pr61561.ll | 2 +-
llvm/test/CodeGen/RISCV/rvv/select-sra.ll | 4 +-
.../test/CodeGen/RISCV/rvv/shuffle-reverse.ll | 180 +-
.../CodeGen/RISCV/rvv/sink-splat-operands.ll | 44 +-
llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll | 196 +-
llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll | 309 +-
.../RISCV/rvv/vector-interleave-fixed.ll | 115 +-
llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll | 232 +-
llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll | 341 +-
.../CodeGen/RISCV/select-binop-identity.ll | 4 +-
.../test/CodeGen/RISCV/select-constant-xor.ll | 2 +-
llvm/test/CodeGen/RISCV/select-to-and-zext.ll | 4 +-
llvm/test/CodeGen/RISCV/select.ll | 18 +-
.../CodeGen/RISCV/selectcc-to-shiftand.ll | 19 +-
llvm/test/CodeGen/RISCV/sextw-removal.ll | 6 +-
llvm/test/CodeGen/RISCV/shl-demanded.ll | 2 +-
llvm/test/CodeGen/RISCV/shlimm-addimm.ll | 20 +-
.../CodeGen/RISCV/signed-truncation-check.ll | 36 +-
llvm/test/CodeGen/RISCV/srem-lkk.ll | 26 +-
.../CodeGen/RISCV/srem-seteq-illegal-types.ll | 10 +-
llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 4 +-
llvm/test/CodeGen/RISCV/urem-lkk.ll | 6 +-
.../CodeGen/RISCV/urem-seteq-illegal-types.ll | 26 +-
llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 2 +-
llvm/test/CodeGen/RISCV/vararg.ll | 8 +-
llvm/test/CodeGen/RISCV/xaluo.ll | 6 +-
113 files changed, 11518 insertions(+), 21420 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 439a1bb6e1e69d2..3a756db977904b6 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -12,11 +12,12 @@
// extended bits aren't consumed or because the input was already sign extended
// by an earlier instruction.
//
-// Then it removes the -w suffix from addw, slliw and mulw instructions
-// whenever all users are dependent only on the lower word of the result of the
-// instruction. We do this only for addw, slliw, and mulw because the -w forms
-// are less compressible: c.add and c.slli have a larger register encoding than
-// their w counterparts, and there's no compressible version of mulw.
+// Then it removes the -w suffix from opw instructions whenever all users are
+// dependent only on the lower word of the result of the instruction. This is
+// profitable for addw because c.add has a larger register encoding than c.addw.
+// For the remaining opw instructions, there is no compressed w variant. This
+// tramsform also has the side effect of making RV32 and RV64 codegen for 32
+// bit constants match which helps reduce check duplication in LIT tests.
//
//===---------------------------------------------------------------------===//
@@ -661,8 +662,11 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
default:
continue;
case RISCV::ADDW: Opc = RISCV::ADD; break;
+ case RISCV::ADDIW: Opc = RISCV::ADDI; break;
case RISCV::MULW: Opc = RISCV::MUL; break;
case RISCV::SLLIW: Opc = RISCV::SLLI; break;
+ case RISCV::SRLIW: Opc = RISCV::SRLI; break;
+ case RISCV::SRAIW: Opc = RISCV::SRAI; break;
}
if (hasAllWUsers(MI, ST, MRI)) {
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
index c503d6541b0a577..16a81b79b2f3fc2 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
@@ -34,9 +34,9 @@ define i32 @add_i8_signext_i32(i8 %a, i8 %b) {
; RV64IM-LABEL: add_i8_signext_i32:
; RV64IM: # %bb.0: # %entry
; RV64IM-NEXT: slli a0, a0, 24
-; RV64IM-NEXT: sraiw a0, a0, 24
+; RV64IM-NEXT: srai a0, a0, 24
; RV64IM-NEXT: slli a1, a1, 24
-; RV64IM-NEXT: sraiw a1, a1, 24
+; RV64IM-NEXT: srai a1, a1, 24
; RV64IM-NEXT: addw a0, a0, a1
; RV64IM-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll
index a41664fde38581c..274f1cef49aa955 100644
--- a/llvm/test/CodeGen/RISCV/add-before-shl.ll
+++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll
@@ -25,7 +25,7 @@ define signext i32 @add_small_const(i32 signext %a) nounwind {
;
; RV64I-LABEL: add_small_const:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 1
+; RV64I-NEXT: addi a0, a0, 1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: jalr zero, 0(ra)
@@ -39,7 +39,7 @@ define signext i32 @add_small_const(i32 signext %a) nounwind {
;
; RV64C-LABEL: add_small_const:
; RV64C: # %bb.0:
-; RV64C-NEXT: c.addiw a0, 1
+; RV64C-NEXT: c.addi a0, 1
; RV64C-NEXT: c.slli a0, 56
; RV64C-NEXT: c.srai a0, 56
; RV64C-NEXT: c.jr ra
@@ -78,7 +78,7 @@ define signext i32 @add_large_const(i32 signext %a) nounwind {
; RV64C-LABEL: add_large_const:
; RV64C: # %bb.0:
; RV64C-NEXT: c.lui a1, 1
-; RV64C-NEXT: c.addiw a1, -1
+; RV64C-NEXT: c.addi a1, -1
; RV64C-NEXT: c.add a0, a1
; RV64C-NEXT: c.slli a0, 48
; RV64C-NEXT: c.srai a0, 48
@@ -118,7 +118,7 @@ define signext i32 @add_huge_const(i32 signext %a) nounwind {
; RV64C-LABEL: add_huge_const:
; RV64C: # %bb.0:
; RV64C-NEXT: c.lui a1, 8
-; RV64C-NEXT: c.addiw a1, -1
+; RV64C-NEXT: c.addi a1, -1
; RV64C-NEXT: c.add a0, a1
; RV64C-NEXT: c.slli a0, 48
; RV64C-NEXT: c.srai a0, 48
@@ -139,7 +139,7 @@ define signext i24 @add_non_machine_type(i24 signext %a) nounwind {
;
; RV64I-LABEL: add_non_machine_type:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 256
+; RV64I-NEXT: addi a0, a0, 256
; RV64I-NEXT: slli a0, a0, 52
; RV64I-NEXT: srai a0, a0, 40
; RV64I-NEXT: jalr zero, 0(ra)
@@ -153,7 +153,7 @@ define signext i24 @add_non_machine_type(i24 signext %a) nounwind {
;
; RV64C-LABEL: add_non_machine_type:
; RV64C: # %bb.0:
-; RV64C-NEXT: addiw a0, a0, 256
+; RV64C-NEXT: addi a0, a0, 256
; RV64C-NEXT: c.slli a0, 52
; RV64C-NEXT: c.srai a0, 40
; RV64C-NEXT: c.jr ra
diff --git a/llvm/test/CodeGen/RISCV/add-imm.ll b/llvm/test/CodeGen/RISCV/add-imm.ll
index 700fec0192d3e74..52751f1c224211f 100644
--- a/llvm/test/CodeGen/RISCV/add-imm.ll
+++ b/llvm/test/CodeGen/RISCV/add-imm.ll
@@ -29,7 +29,7 @@ define i32 @add_positive_low_bound_accept(i32 %a) nounwind {
;
; RV64I-LABEL: add_positive_low_bound_accept:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 2047
+; RV64I-NEXT: addi a0, a0, 2047
; RV64I-NEXT: addiw a0, a0, 1
; RV64I-NEXT: ret
%1 = add i32 %a, 2048
@@ -45,7 +45,7 @@ define i32 @add_positive_high_bound_accept(i32 %a) nounwind {
;
; RV64I-LABEL: add_positive_high_bound_accept:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 2047
+; RV64I-NEXT: addi a0, a0, 2047
; RV64I-NEXT: addiw a0, a0, 2047
; RV64I-NEXT: ret
%1 = add i32 %a, 4094
@@ -63,7 +63,7 @@ define i32 @add_positive_high_bound_reject(i32 %a) nounwind {
; RV64I-LABEL: add_positive_high_bound_reject:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%1 = add i32 %a, 4095
@@ -93,7 +93,7 @@ define i32 @add_negative_high_bound_accept(i32 %a) nounwind {
;
; RV64I-LABEL: add_negative_high_bound_accept:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, -2048
+; RV64I-NEXT: addi a0, a0, -2048
; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: ret
%1 = add i32 %a, -2049
@@ -109,7 +109,7 @@ define i32 @add_negative_low_bound_accept(i32 %a) nounwind {
;
; RV64I-LABEL: add_negative_low_bound_accept:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, -2048
+; RV64I-NEXT: addi a0, a0, -2048
; RV64I-NEXT: addiw a0, a0, -2048
; RV64I-NEXT: ret
%1 = add i32 %a, -4096
@@ -127,7 +127,7 @@ define i32 @add_negative_low_bound_reject(i32 %a) nounwind {
; RV64I-LABEL: add_negative_low_bound_reject:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 1048575
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%1 = add i32 %a, -4097
@@ -143,7 +143,7 @@ define i32 @add32_accept(i32 %a) nounwind {
;
; RV64I-LABEL: add32_accept:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 2047
+; RV64I-NEXT: addi a0, a0, 2047
; RV64I-NEXT: addiw a0, a0, 952
; RV64I-NEXT: ret
%1 = add i32 %a, 2999
@@ -159,7 +159,7 @@ define signext i32 @add32_sext_accept(i32 signext %a) nounwind {
;
; RV64I-LABEL: add32_sext_accept:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 2047
+; RV64I-NEXT: addi a0, a0, 2047
; RV64I-NEXT: addiw a0, a0, 952
; RV64I-NEXT: ret
%1 = add i32 %a, 2999
@@ -178,7 +178,7 @@ define signext i32 @add32_sext_reject_on_rv64(i32 signext %a) nounwind {
;
; RV64I-LABEL: add32_sext_reject_on_rv64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 2047
+; RV64I-NEXT: addi a0, a0, 2047
; RV64I-NEXT: addiw a0, a0, 953
; RV64I-NEXT: lui a1, %hi(gv0)
; RV64I-NEXT: sw a0, %lo(gv0)(a1)
@@ -231,7 +231,7 @@ define void @add32_reject() nounwind {
; RV64I-NEXT: lui a2, %hi(gb)
; RV64I-NEXT: lw a3, %lo(gb)(a2)
; RV64I-NEXT: lui a4, 1
-; RV64I-NEXT: addiw a4, a4, -1096
+; RV64I-NEXT: addi a4, a4, -1096
; RV64I-NEXT: add a1, a1, a4
; RV64I-NEXT: add a3, a3, a4
; RV64I-NEXT: sw a1, %lo(ga)(a0)
diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
index d1bc480455dd35f..48fa69e10456563 100644
--- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
+++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
@@ -84,7 +84,7 @@ define i32 @add_mul_combine_accept_b1(i32 %x) {
; RV64IMB-NEXT: li a1, 23
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: lui a1, 50
-; RV64IMB-NEXT: addiw a1, a1, 1119
+; RV64IMB-NEXT: addi a1, a1, 1119
; RV64IMB-NEXT: addw a0, a0, a1
; RV64IMB-NEXT: ret
%tmp0 = add i32 %x, 8953
@@ -107,7 +107,7 @@ define signext i32 @add_mul_combine_accept_b2(i32 signext %x) {
; RV64IMB-NEXT: li a1, 23
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: lui a1, 50
-; RV64IMB-NEXT: addiw a1, a1, 1119
+; RV64IMB-NEXT: addi a1, a1, 1119
; RV64IMB-NEXT: addw a0, a0, a1
; RV64IMB-NEXT: ret
%tmp0 = add i32 %x, 8953
@@ -153,7 +153,7 @@ define i32 @add_mul_combine_reject_a1(i32 %x) {
;
; RV64IMB-LABEL: add_mul_combine_reject_a1:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 1971
+; RV64IMB-NEXT: addi a0, a0, 1971
; RV64IMB-NEXT: li a1, 29
; RV64IMB-NEXT: mulw a0, a0, a1
; RV64IMB-NEXT: ret
@@ -172,7 +172,7 @@ define signext i32 @add_mul_combine_reject_a2(i32 signext %x) {
;
; RV64IMB-LABEL: add_mul_combine_reject_a2:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 1971
+; RV64IMB-NEXT: addi a0, a0, 1971
; RV64IMB-NEXT: li a1, 29
; RV64IMB-NEXT: mulw a0, a0, a1
; RV64IMB-NEXT: ret
@@ -217,7 +217,7 @@ define i32 @add_mul_combine_reject_c1(i32 %x) {
;
; RV64IMB-LABEL: add_mul_combine_reject_c1:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 1000
+; RV64IMB-NEXT: addi a0, a0, 1000
; RV64IMB-NEXT: sh3add a1, a0, a0
; RV64IMB-NEXT: sh3add a0, a1, a0
; RV64IMB-NEXT: sext.w a0, a0
@@ -237,7 +237,7 @@ define signext i32 @add_mul_combine_reject_c2(i32 signext %x) {
;
; RV64IMB-LABEL: add_mul_combine_reject_c2:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 1000
+; RV64IMB-NEXT: addi a0, a0, 1000
; RV64IMB-NEXT: sh3add a1, a0, a0
; RV64IMB-NEXT: sh3add a0, a1, a0
; RV64IMB-NEXT: sext.w a0, a0
@@ -349,7 +349,7 @@ define i32 @add_mul_combine_reject_e1(i32 %x) {
;
; RV64IMB-LABEL: add_mul_combine_reject_e1:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 1971
+; RV64IMB-NEXT: addi a0, a0, 1971
; RV64IMB-NEXT: li a1, 29
; RV64IMB-NEXT: mulw a0, a0, a1
; RV64IMB-NEXT: ret
@@ -368,7 +368,7 @@ define signext i32 @add_mul_combine_reject_e2(i32 signext %x) {
;
; RV64IMB-LABEL: add_mul_combine_reject_e2:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 1971
+; RV64IMB-NEXT: addi a0, a0, 1971
; RV64IMB-NEXT: li a1, 29
; RV64IMB-NEXT: mulw a0, a0, a1
; RV64IMB-NEXT: ret
@@ -414,7 +414,7 @@ define i32 @add_mul_combine_reject_f1(i32 %x) {
;
; RV64IMB-LABEL: add_mul_combine_reject_f1:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 1972
+; RV64IMB-NEXT: addi a0, a0, 1972
; RV64IMB-NEXT: li a1, 29
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, 11
@@ -435,7 +435,7 @@ define signext i32 @add_mul_combine_reject_f2(i32 signext %x) {
;
; RV64IMB-LABEL: add_mul_combine_reject_f2:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 1972
+; RV64IMB-NEXT: addi a0, a0, 1972
; RV64IMB-NEXT: li a1, 29
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, 11
@@ -483,7 +483,7 @@ define i32 @add_mul_combine_reject_g1(i32 %x) {
;
; RV64IMB-LABEL: add_mul_combine_reject_g1:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 100
+; RV64IMB-NEXT: addi a0, a0, 100
; RV64IMB-NEXT: sh3add a1, a0, a0
; RV64IMB-NEXT: sh3add a0, a1, a0
; RV64IMB-NEXT: addiw a0, a0, 10
@@ -504,7 +504,7 @@ define signext i32 @add_mul_combine_reject_g2(i32 signext %x) {
;
; RV64IMB-LABEL: add_mul_combine_reject_g2:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 100
+; RV64IMB-NEXT: addi a0, a0, 100
; RV64IMB-NEXT: sh3add a1, a0, a0
; RV64IMB-NEXT: sh3add a0, a1, a0
; RV64IMB-NEXT: addiw a0, a0, 10
@@ -581,9 +581,9 @@ define i32 @mul3000_add8990_a(i32 %x) {
;
; RV64IMB-LABEL: mul3000_add8990_a:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 3
+; RV64IMB-NEXT: addi a0, a0, 3
; RV64IMB-NEXT: lui a1, 1
-; RV64IMB-NEXT: addiw a1, a1, -1096
+; RV64IMB-NEXT: addi a1, a1, -1096
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, -10
; RV64IMB-NEXT: ret
@@ -604,9 +604,9 @@ define signext i32 @mul3000_add8990_b(i32 signext %x) {
;
; RV64IMB-LABEL: mul3000_add8990_b:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 3
+; RV64IMB-NEXT: addi a0, a0, 3
; RV64IMB-NEXT: lui a1, 1
-; RV64IMB-NEXT: addiw a1, a1, -1096
+; RV64IMB-NEXT: addi a1, a1, -1096
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, -10
; RV64IMB-NEXT: ret
@@ -656,9 +656,9 @@ define i32 @mul3000_sub8990_a(i32 %x) {
;
; RV64IMB-LABEL: mul3000_sub8990_a:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, -3
+; RV64IMB-NEXT: addi a0, a0, -3
; RV64IMB-NEXT: lui a1, 1
-; RV64IMB-NEXT: addiw a1, a1, -1096
+; RV64IMB-NEXT: addi a1, a1, -1096
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, 10
; RV64IMB-NEXT: ret
@@ -679,9 +679,9 @@ define signext i32 @mul3000_sub8990_b(i32 signext %x) {
;
; RV64IMB-LABEL: mul3000_sub8990_b:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, -3
+; RV64IMB-NEXT: addi a0, a0, -3
; RV64IMB-NEXT: lui a1, 1
-; RV64IMB-NEXT: addiw a1, a1, -1096
+; RV64IMB-NEXT: addi a1, a1, -1096
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, 10
; RV64IMB-NEXT: ret
@@ -732,9 +732,9 @@ define i32 @mulneg3000_add8990_a(i32 %x) {
;
; RV64IMB-LABEL: mulneg3000_add8990_a:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, -3
+; RV64IMB-NEXT: addi a0, a0, -3
; RV64IMB-NEXT: lui a1, 1048575
-; RV64IMB-NEXT: addiw a1, a1, 1096
+; RV64IMB-NEXT: addi a1, a1, 1096
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, -10
; RV64IMB-NEXT: ret
@@ -755,9 +755,9 @@ define signext i32 @mulneg3000_add8990_b(i32 signext %x) {
;
; RV64IMB-LABEL: mulneg3000_add8990_b:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, -3
+; RV64IMB-NEXT: addi a0, a0, -3
; RV64IMB-NEXT: lui a1, 1048575
-; RV64IMB-NEXT: addiw a1, a1, 1096
+; RV64IMB-NEXT: addi a1, a1, 1096
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, -10
; RV64IMB-NEXT: ret
@@ -808,9 +808,9 @@ define i32 @mulneg3000_sub8990_a(i32 %x) {
;
; RV64IMB-LABEL: mulneg3000_sub8990_a:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 3
+; RV64IMB-NEXT: addi a0, a0, 3
; RV64IMB-NEXT: lui a1, 1048575
-; RV64IMB-NEXT: addiw a1, a1, 1096
+; RV64IMB-NEXT: addi a1, a1, 1096
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, 10
; RV64IMB-NEXT: ret
@@ -831,9 +831,9 @@ define signext i32 @mulneg3000_sub8990_b(i32 signext %x) {
;
; RV64IMB-LABEL: mulneg3000_sub8990_b:
; RV64IMB: # %bb.0:
-; RV64IMB-NEXT: addiw a0, a0, 3
+; RV64IMB-NEXT: addi a0, a0, 3
; RV64IMB-NEXT: lui a1, 1048575
-; RV64IMB-NEXT: addiw a1, a1, 1096
+; RV64IMB-NEXT: addi a1, a1, 1096
; RV64IMB-NEXT: mul a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, 10
; RV64IMB-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/and.ll b/llvm/test/CodeGen/RISCV/and.ll
index 5eff422013da6a8..79e3b954c50d8d8 100644
--- a/llvm/test/CodeGen/RISCV/and.ll
+++ b/llvm/test/CodeGen/RISCV/and.ll
@@ -195,7 +195,7 @@ define i64 @and64_0x7fffffff00000000(i64 %x) {
; RV64I-LABEL: and64_0x7fffffff00000000:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 524288
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
index f900b5161f75128..eea4cb72938af23 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
@@ -1104,7 +1104,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
; RV64IA-NEXT: andi a3, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a5, a4, a0
; RV64IA-NEXT: and a1, a1, a4
; RV64IA-NEXT: sllw a1, a1, a0
@@ -1206,7 +1206,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-WMO-NEXT: andi a3, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a5, a4, a0
; RV64IA-WMO-NEXT: and a1, a1, a4
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -1230,7 +1230,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-TSO-NEXT: andi a3, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a5, a4, a0
; RV64IA-TSO-NEXT: and a1, a1, a4
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -1332,7 +1332,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-WMO-NEXT: andi a3, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a5, a4, a0
; RV64IA-WMO-NEXT: and a1, a1, a4
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -1356,7 +1356,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-TSO-NEXT: andi a3, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a5, a4, a0
; RV64IA-TSO-NEXT: and a1, a1, a4
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -1458,7 +1458,7 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-WMO-NEXT: andi a3, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a5, a4, a0
; RV64IA-WMO-NEXT: and a1, a1, a4
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -1482,7 +1482,7 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-TSO-NEXT: andi a3, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a5, a4, a0
; RV64IA-TSO-NEXT: and a1, a1, a4
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -1584,7 +1584,7 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-WMO-NEXT: andi a3, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a5, a4, a0
; RV64IA-WMO-NEXT: and a1, a1, a4
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -1608,7 +1608,7 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-TSO-NEXT: andi a3, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a5, a4, a0
; RV64IA-TSO-NEXT: and a1, a1, a4
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -1710,7 +1710,7 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-WMO-NEXT: andi a3, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a5, a4, a0
; RV64IA-WMO-NEXT: and a1, a1, a4
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -1734,7 +1734,7 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-TSO-NEXT: andi a3, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a5, a4, a0
; RV64IA-TSO-NEXT: and a1, a1, a4
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -1836,7 +1836,7 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-WMO-NEXT: andi a3, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a5, a4, a0
; RV64IA-WMO-NEXT: and a1, a1, a4
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -1860,7 +1860,7 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-TSO-NEXT: andi a3, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a5, a4, a0
; RV64IA-TSO-NEXT: and a1, a1, a4
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -1938,7 +1938,7 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
; RV64IA-NEXT: andi a3, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a5, a4, a0
; RV64IA-NEXT: and a1, a1, a4
; RV64IA-NEXT: sllw a1, a1, a0
@@ -2016,7 +2016,7 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-NEXT: andi a3, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a5, a4, a0
; RV64IA-NEXT: and a1, a1, a4
; RV64IA-NEXT: sllw a1, a1, a0
@@ -2094,7 +2094,7 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-NEXT: andi a3, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a5, a4, a0
; RV64IA-NEXT: and a1, a1, a4
; RV64IA-NEXT: sllw a1, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index c4f224dcba1b21e..e97a1ea5dfca009 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -6864,7 +6864,7 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -6951,7 +6951,7 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -6972,7 +6972,7 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -7059,7 +7059,7 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -7080,7 +7080,7 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -7167,7 +7167,7 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -7188,7 +7188,7 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -7254,7 +7254,7 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -7316,7 +7316,7 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
; RV64IA-NEXT: andi a1, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a2, 16
-; RV64IA-NEXT: addiw a2, a2, -1
+; RV64IA-NEXT: addi a2, a2, -1
; RV64IA-NEXT: sllw a2, a2, a0
; RV64IA-NEXT: not a2, a2
; RV64IA-NEXT: amoand.w a1, a2, (a1)
@@ -7378,7 +7378,7 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
; RV64IA-WMO-NEXT: andi a1, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a2, 16
-; RV64IA-WMO-NEXT: addiw a2, a2, -1
+; RV64IA-WMO-NEXT: addi a2, a2, -1
; RV64IA-WMO-NEXT: sllw a2, a2, a0
; RV64IA-WMO-NEXT: not a2, a2
; RV64IA-WMO-NEXT: amoand.w.aq a1, a2, (a1)
@@ -7390,7 +7390,7 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
; RV64IA-TSO-NEXT: andi a1, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a2, 16
-; RV64IA-TSO-NEXT: addiw a2, a2, -1
+; RV64IA-TSO-NEXT: addi a2, a2, -1
; RV64IA-TSO-NEXT: sllw a2, a2, a0
; RV64IA-TSO-NEXT: not a2, a2
; RV64IA-TSO-NEXT: amoand.w a1, a2, (a1)
@@ -7452,7 +7452,7 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
; RV64IA-WMO-NEXT: andi a1, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a2, 16
-; RV64IA-WMO-NEXT: addiw a2, a2, -1
+; RV64IA-WMO-NEXT: addi a2, a2, -1
; RV64IA-WMO-NEXT: sllw a2, a2, a0
; RV64IA-WMO-NEXT: not a2, a2
; RV64IA-WMO-NEXT: amoand.w.rl a1, a2, (a1)
@@ -7464,7 +7464,7 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
; RV64IA-TSO-NEXT: andi a1, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a2, 16
-; RV64IA-TSO-NEXT: addiw a2, a2, -1
+; RV64IA-TSO-NEXT: addi a2, a2, -1
; RV64IA-TSO-NEXT: sllw a2, a2, a0
; RV64IA-TSO-NEXT: not a2, a2
; RV64IA-TSO-NEXT: amoand.w a1, a2, (a1)
@@ -7526,7 +7526,7 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
; RV64IA-WMO-NEXT: andi a1, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a2, 16
-; RV64IA-WMO-NEXT: addiw a2, a2, -1
+; RV64IA-WMO-NEXT: addi a2, a2, -1
; RV64IA-WMO-NEXT: sllw a2, a2, a0
; RV64IA-WMO-NEXT: not a2, a2
; RV64IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1)
@@ -7538,7 +7538,7 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-NEXT: andi a1, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a2, 16
-; RV64IA-TSO-NEXT: addiw a2, a2, -1
+; RV64IA-TSO-NEXT: addi a2, a2, -1
; RV64IA-TSO-NEXT: sllw a2, a2, a0
; RV64IA-TSO-NEXT: not a2, a2
; RV64IA-TSO-NEXT: amoand.w a1, a2, (a1)
@@ -7600,7 +7600,7 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-NEXT: andi a1, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a2, 16
-; RV64IA-WMO-NEXT: addiw a2, a2, -1
+; RV64IA-WMO-NEXT: addi a2, a2, -1
; RV64IA-WMO-NEXT: sllw a2, a2, a0
; RV64IA-WMO-NEXT: not a2, a2
; RV64IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1)
@@ -7612,7 +7612,7 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-NEXT: andi a1, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a2, 16
-; RV64IA-TSO-NEXT: addiw a2, a2, -1
+; RV64IA-TSO-NEXT: addi a2, a2, -1
; RV64IA-TSO-NEXT: sllw a2, a2, a0
; RV64IA-TSO-NEXT: not a2, a2
; RV64IA-TSO-NEXT: amoand.w a1, a2, (a1)
@@ -7663,7 +7663,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
; RV64IA-NEXT: andi a1, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a2, 16
-; RV64IA-NEXT: addiw a2, a2, -1
+; RV64IA-NEXT: addi a2, a2, -1
; RV64IA-NEXT: sllw a2, a2, a0
; RV64IA-NEXT: amoor.w a1, a2, (a1)
; RV64IA-NEXT: srlw a0, a1, a0
@@ -7724,7 +7724,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
; RV64IA-WMO-NEXT: andi a1, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a2, 16
-; RV64IA-WMO-NEXT: addiw a2, a2, -1
+; RV64IA-WMO-NEXT: addi a2, a2, -1
; RV64IA-WMO-NEXT: sllw a2, a2, a0
; RV64IA-WMO-NEXT: amoor.w.aq a1, a2, (a1)
; RV64IA-WMO-NEXT: srlw a0, a1, a0
@@ -7735,7 +7735,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
; RV64IA-TSO-NEXT: andi a1, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a2, 16
-; RV64IA-TSO-NEXT: addiw a2, a2, -1
+; RV64IA-TSO-NEXT: addi a2, a2, -1
; RV64IA-TSO-NEXT: sllw a2, a2, a0
; RV64IA-TSO-NEXT: amoor.w a1, a2, (a1)
; RV64IA-TSO-NEXT: srlw a0, a1, a0
@@ -7796,7 +7796,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
; RV64IA-WMO-NEXT: andi a1, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a2, 16
-; RV64IA-WMO-NEXT: addiw a2, a2, -1
+; RV64IA-WMO-NEXT: addi a2, a2, -1
; RV64IA-WMO-NEXT: sllw a2, a2, a0
; RV64IA-WMO-NEXT: amoor.w.rl a1, a2, (a1)
; RV64IA-WMO-NEXT: srlw a0, a1, a0
@@ -7807,7 +7807,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
; RV64IA-TSO-NEXT: andi a1, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a2, 16
-; RV64IA-TSO-NEXT: addiw a2, a2, -1
+; RV64IA-TSO-NEXT: addi a2, a2, -1
; RV64IA-TSO-NEXT: sllw a2, a2, a0
; RV64IA-TSO-NEXT: amoor.w a1, a2, (a1)
; RV64IA-TSO-NEXT: srlw a0, a1, a0
@@ -7868,7 +7868,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
; RV64IA-WMO-NEXT: andi a1, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a2, 16
-; RV64IA-WMO-NEXT: addiw a2, a2, -1
+; RV64IA-WMO-NEXT: addi a2, a2, -1
; RV64IA-WMO-NEXT: sllw a2, a2, a0
; RV64IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1)
; RV64IA-WMO-NEXT: srlw a0, a1, a0
@@ -7879,7 +7879,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-NEXT: andi a1, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a2, 16
-; RV64IA-TSO-NEXT: addiw a2, a2, -1
+; RV64IA-TSO-NEXT: addi a2, a2, -1
; RV64IA-TSO-NEXT: sllw a2, a2, a0
; RV64IA-TSO-NEXT: amoor.w a1, a2, (a1)
; RV64IA-TSO-NEXT: srlw a0, a1, a0
@@ -7940,7 +7940,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
; RV64IA-WMO-NEXT: andi a1, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a2, 16
-; RV64IA-WMO-NEXT: addiw a2, a2, -1
+; RV64IA-WMO-NEXT: addi a2, a2, -1
; RV64IA-WMO-NEXT: sllw a2, a2, a0
; RV64IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1)
; RV64IA-WMO-NEXT: srlw a0, a1, a0
@@ -7951,7 +7951,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-NEXT: andi a1, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a2, 16
-; RV64IA-TSO-NEXT: addiw a2, a2, -1
+; RV64IA-TSO-NEXT: addi a2, a2, -1
; RV64IA-TSO-NEXT: sllw a2, a2, a0
; RV64IA-TSO-NEXT: amoor.w a1, a2, (a1)
; RV64IA-TSO-NEXT: srlw a0, a1, a0
@@ -8007,7 +8007,7 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -8094,7 +8094,7 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -8115,7 +8115,7 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -8202,7 +8202,7 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -8223,7 +8223,7 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -8310,7 +8310,7 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -8331,7 +8331,7 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -8397,7 +8397,7 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -8463,7 +8463,7 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -8550,7 +8550,7 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -8571,7 +8571,7 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -8658,7 +8658,7 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -8679,7 +8679,7 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -8766,7 +8766,7 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -8787,7 +8787,7 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -8853,7 +8853,7 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -8913,7 +8913,7 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: not a4, a4
; RV64IA-NEXT: and a1, a1, a3
@@ -8982,7 +8982,7 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: not a4, a4
; RV64IA-WMO-NEXT: and a1, a1, a3
@@ -8997,7 +8997,7 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: not a4, a4
; RV64IA-TSO-NEXT: and a1, a1, a3
@@ -9066,7 +9066,7 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: not a4, a4
; RV64IA-WMO-NEXT: and a1, a1, a3
@@ -9081,7 +9081,7 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: not a4, a4
; RV64IA-TSO-NEXT: and a1, a1, a3
@@ -9150,7 +9150,7 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: not a4, a4
; RV64IA-WMO-NEXT: and a1, a1, a3
@@ -9165,7 +9165,7 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: not a4, a4
; RV64IA-TSO-NEXT: and a1, a1, a3
@@ -9234,7 +9234,7 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: not a4, a4
; RV64IA-WMO-NEXT: and a1, a1, a3
@@ -9249,7 +9249,7 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: not a4, a4
; RV64IA-TSO-NEXT: and a1, a1, a3
@@ -9310,7 +9310,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -9400,7 +9400,7 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -9422,7 +9422,7 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -9512,7 +9512,7 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -9534,7 +9534,7 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -9624,7 +9624,7 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -9646,7 +9646,7 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -9714,7 +9714,7 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -10492,7 +10492,7 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: andi a3, a0, 24
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a4, a4, a0
; RV64IA-NEXT: slli a1, a1, 48
; RV64IA-NEXT: srai a1, a1, 48
@@ -10673,7 +10673,7 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: andi a3, a0, 24
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a4, a4, a0
; RV64IA-WMO-NEXT: slli a1, a1, 48
; RV64IA-WMO-NEXT: srai a1, a1, 48
@@ -10704,7 +10704,7 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: andi a3, a0, 24
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a4, a4, a0
; RV64IA-TSO-NEXT: slli a1, a1, 48
; RV64IA-TSO-NEXT: srai a1, a1, 48
@@ -10885,7 +10885,7 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: andi a3, a0, 24
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a4, a4, a0
; RV64IA-WMO-NEXT: slli a1, a1, 48
; RV64IA-WMO-NEXT: srai a1, a1, 48
@@ -10916,7 +10916,7 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: andi a3, a0, 24
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a4, a4, a0
; RV64IA-TSO-NEXT: slli a1, a1, 48
; RV64IA-TSO-NEXT: srai a1, a1, 48
@@ -11097,7 +11097,7 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: andi a3, a0, 24
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a4, a4, a0
; RV64IA-WMO-NEXT: slli a1, a1, 48
; RV64IA-WMO-NEXT: srai a1, a1, 48
@@ -11128,7 +11128,7 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: andi a3, a0, 24
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a4, a4, a0
; RV64IA-TSO-NEXT: slli a1, a1, 48
; RV64IA-TSO-NEXT: srai a1, a1, 48
@@ -11278,7 +11278,7 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: andi a3, a0, 24
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a4, a4, a0
; RV64IA-NEXT: slli a1, a1, 48
; RV64IA-NEXT: srai a1, a1, 48
@@ -11428,7 +11428,7 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: andi a3, a0, 24
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a4, a4, a0
; RV64IA-NEXT: slli a1, a1, 48
; RV64IA-NEXT: srai a1, a1, 48
@@ -11609,7 +11609,7 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: andi a3, a0, 24
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a4, a4, a0
; RV64IA-WMO-NEXT: slli a1, a1, 48
; RV64IA-WMO-NEXT: srai a1, a1, 48
@@ -11640,7 +11640,7 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: andi a3, a0, 24
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a4, a4, a0
; RV64IA-TSO-NEXT: slli a1, a1, 48
; RV64IA-TSO-NEXT: srai a1, a1, 48
@@ -11821,7 +11821,7 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: andi a3, a0, 24
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a4, a4, a0
; RV64IA-WMO-NEXT: slli a1, a1, 48
; RV64IA-WMO-NEXT: srai a1, a1, 48
@@ -11852,7 +11852,7 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: andi a3, a0, 24
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a4, a4, a0
; RV64IA-TSO-NEXT: slli a1, a1, 48
; RV64IA-TSO-NEXT: srai a1, a1, 48
@@ -12033,7 +12033,7 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: andi a3, a0, 24
; RV64IA-WMO-NEXT: lui a4, 16
-; RV64IA-WMO-NEXT: addiw a4, a4, -1
+; RV64IA-WMO-NEXT: addi a4, a4, -1
; RV64IA-WMO-NEXT: sllw a4, a4, a0
; RV64IA-WMO-NEXT: slli a1, a1, 48
; RV64IA-WMO-NEXT: srai a1, a1, 48
@@ -12064,7 +12064,7 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: andi a3, a0, 24
; RV64IA-TSO-NEXT: lui a4, 16
-; RV64IA-TSO-NEXT: addiw a4, a4, -1
+; RV64IA-TSO-NEXT: addi a4, a4, -1
; RV64IA-TSO-NEXT: sllw a4, a4, a0
; RV64IA-TSO-NEXT: slli a1, a1, 48
; RV64IA-TSO-NEXT: srai a1, a1, 48
@@ -12214,7 +12214,7 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: andi a3, a0, 24
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a4, a4, a0
; RV64IA-NEXT: slli a1, a1, 48
; RV64IA-NEXT: srai a1, a1, 48
@@ -12361,7 +12361,7 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -12528,7 +12528,7 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -12553,7 +12553,7 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -12720,7 +12720,7 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -12745,7 +12745,7 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -12912,7 +12912,7 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -12937,7 +12937,7 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -13079,7 +13079,7 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -13221,7 +13221,7 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -13388,7 +13388,7 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -13413,7 +13413,7 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -13580,7 +13580,7 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -13605,7 +13605,7 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -13772,7 +13772,7 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-NEXT: andi a2, a0, -4
; RV64IA-WMO-NEXT: slli a0, a0, 3
; RV64IA-WMO-NEXT: lui a3, 16
-; RV64IA-WMO-NEXT: addiw a3, a3, -1
+; RV64IA-WMO-NEXT: addi a3, a3, -1
; RV64IA-WMO-NEXT: sllw a4, a3, a0
; RV64IA-WMO-NEXT: and a1, a1, a3
; RV64IA-WMO-NEXT: sllw a1, a1, a0
@@ -13797,7 +13797,7 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NEXT: andi a2, a0, -4
; RV64IA-TSO-NEXT: slli a0, a0, 3
; RV64IA-TSO-NEXT: lui a3, 16
-; RV64IA-TSO-NEXT: addiw a3, a3, -1
+; RV64IA-TSO-NEXT: addi a3, a3, -1
; RV64IA-TSO-NEXT: sllw a4, a3, a0
; RV64IA-TSO-NEXT: and a1, a1, a3
; RV64IA-TSO-NEXT: sllw a1, a1, a0
@@ -13939,7 +13939,7 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index c08f045ee8abc0a..bd945c865c359d5 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -1207,7 +1207,7 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -1281,7 +1281,7 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -1355,7 +1355,7 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -1423,7 +1423,7 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: not a4, a4
; RV64IA-NEXT: and a1, a1, a3
@@ -1492,7 +1492,7 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -1748,7 +1748,7 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: andi a3, a0, 24
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a4, a4, a0
; RV64IA-NEXT: slli a1, a1, 48
; RV64IA-NEXT: srai a1, a1, 48
@@ -1904,7 +1904,7 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: andi a3, a0, 24
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a4, a4, a0
; RV64IA-NEXT: slli a1, a1, 48
; RV64IA-NEXT: srai a1, a1, 48
@@ -2057,7 +2057,7 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -2205,7 +2205,7 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a3, 16
-; RV64IA-NEXT: addiw a3, a3, -1
+; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: sllw a1, a1, a0
@@ -3969,7 +3969,7 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext %
; RV64IA-NEXT: andi a3, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a5, a4, a0
; RV64IA-NEXT: and a1, a1, a4
; RV64IA-NEXT: sllw a1, a1, a0
@@ -4054,7 +4054,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16
; RV64IA-NEXT: andi a3, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: lui a4, 16
-; RV64IA-NEXT: addiw a4, a4, -1
+; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: sllw a5, a4, a0
; RV64IA-NEXT: and a1, a1, a4
; RV64IA-NEXT: sllw a1, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
index 4d75a74f06ac214..5f15a9c0671021b 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
@@ -140,7 +140,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; RV64IA-NEXT: srlw a5, a3, a0
; RV64IA-NEXT: sext.w a6, a3
; RV64IA-NEXT: andi a7, a5, 255
-; RV64IA-NEXT: addiw a5, a5, 1
+; RV64IA-NEXT: addi a5, a5, 1
; RV64IA-NEXT: sltu a7, a7, a1
; RV64IA-NEXT: negw a7, a7
; RV64IA-NEXT: and a5, a7, a5
@@ -304,7 +304,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; RV64IA-NEXT: srlw a6, a4, a0
; RV64IA-NEXT: sext.w a7, a4
; RV64IA-NEXT: and t0, a6, a3
-; RV64IA-NEXT: addiw a6, a6, 1
+; RV64IA-NEXT: addi a6, a6, 1
; RV64IA-NEXT: sltu t0, t0, a1
; RV64IA-NEXT: negw t0, t0
; RV64IA-NEXT: and a6, a6, a3
diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index b68f74a1f7c3a72..8a0c4240d161bfb 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -404,7 +404,7 @@ define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind {
; CHECK64ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5
; CHECK64ZFBFMIN-NEXT: seqz a1, a1
-; CHECK64ZFBFMIN-NEXT: addiw a1, a1, -1
+; CHECK64ZFBFMIN-NEXT: addi a1, a1, -1
; CHECK64ZFBFMIN-NEXT: and a0, a0, a1
; CHECK64ZFBFMIN-NEXT: slli a0, a0, 32
; CHECK64ZFBFMIN-NEXT: srli a0, a0, 32
@@ -420,7 +420,7 @@ define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind {
; RV64ID-NEXT: fcvt.wu.s a0, fa5, rtz
; RV64ID-NEXT: feq.s a1, fa5, fa5
; RV64ID-NEXT: seqz a1, a1
-; RV64ID-NEXT: addiw a1, a1, -1
+; RV64ID-NEXT: addi a1, a1, -1
; RV64ID-NEXT: and a0, a0, a1
; RV64ID-NEXT: slli a0, a0, 32
; RV64ID-NEXT: srli a0, a0, 32
@@ -1722,7 +1722,7 @@ define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind {
; CHECK64ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5
; CHECK64ZFBFMIN-NEXT: seqz a1, a1
-; CHECK64ZFBFMIN-NEXT: addiw a1, a1, -1
+; CHECK64ZFBFMIN-NEXT: addi a1, a1, -1
; CHECK64ZFBFMIN-NEXT: and a0, a0, a1
; CHECK64ZFBFMIN-NEXT: slli a0, a0, 32
; CHECK64ZFBFMIN-NEXT: srli a0, a0, 32
@@ -1738,7 +1738,7 @@ define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind {
; RV64ID-NEXT: fcvt.wu.s a0, fa5, rtz
; RV64ID-NEXT: feq.s a1, fa5, fa5
; RV64ID-NEXT: seqz a1, a1
-; RV64ID-NEXT: addiw a1, a1, -1
+; RV64ID-NEXT: addi a1, a1, -1
; RV64ID-NEXT: and a0, a0, a1
; RV64ID-NEXT: slli a0, a0, 32
; RV64ID-NEXT: srli a0, a0, 32
diff --git a/llvm/test/CodeGen/RISCV/bfloat.ll b/llvm/test/CodeGen/RISCV/bfloat.ll
index c95d61fd6baab7e..5013f76f9b0b33a 100644
--- a/llvm/test/CodeGen/RISCV/bfloat.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat.ll
@@ -367,7 +367,7 @@ define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: lui a2, 16
-; RV64ID-LP64-NEXT: addiw a2, a2, -1
+; RV64ID-LP64-NEXT: addi a2, a2, -1
; RV64ID-LP64-NEXT: and a0, a0, a2
; RV64ID-LP64-NEXT: and a1, a1, a2
; RV64ID-LP64-NEXT: slli a1, a1, 16
@@ -409,7 +409,7 @@ define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 16
-; RV64ID-LP64D-NEXT: addiw a1, a1, -1
+; RV64ID-LP64D-NEXT: addi a1, a1, -1
; RV64ID-LP64D-NEXT: and a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1
; RV64ID-LP64D-NEXT: and a1, a2, a1
@@ -605,7 +605,7 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
; RV64ID-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64ID-LP64-NEXT: mv s0, a0
; RV64ID-LP64-NEXT: lui a0, 16
-; RV64ID-LP64-NEXT: addiw a0, a0, -1
+; RV64ID-LP64-NEXT: addi a0, a0, -1
; RV64ID-LP64-NEXT: and a1, a1, a0
; RV64ID-LP64-NEXT: and a0, a2, a0
; RV64ID-LP64-NEXT: slli a0, a0, 16
@@ -652,7 +652,7 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
; RV64ID-LP64D-NEXT: mv s0, a0
; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0
; RV64ID-LP64D-NEXT: lui a1, 16
-; RV64ID-LP64D-NEXT: addiw a1, a1, -1
+; RV64ID-LP64D-NEXT: addi a1, a1, -1
; RV64ID-LP64D-NEXT: and a0, a0, a1
; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1
; RV64ID-LP64D-NEXT: and a1, a2, a1
diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll
index d281c942773548b..f560a112dd92b47 100644
--- a/llvm/test/CodeGen/RISCV/bittest.ll
+++ b/llvm/test/CodeGen/RISCV/bittest.ll
@@ -266,7 +266,7 @@ define i1 @bittest_constant_by_var_shr_i32(i32 signext %b) nounwind {
; RV64I-LABEL: bittest_constant_by_var_shr_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 301408
-; RV64I-NEXT: addiw a1, a1, 722
+; RV64I-NEXT: addi a1, a1, 722
; RV64I-NEXT: srlw a0, a1, a0
; RV64I-NEXT: andi a0, a0, 1
; RV64I-NEXT: ret
@@ -296,7 +296,7 @@ define i1 @bittest_constant_by_var_shr_i32(i32 signext %b) nounwind {
; RV64XTHEADBS-LABEL: bittest_constant_by_var_shr_i32:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: lui a1, 301408
-; RV64XTHEADBS-NEXT: addiw a1, a1, 722
+; RV64XTHEADBS-NEXT: addi a1, a1, 722
; RV64XTHEADBS-NEXT: srlw a0, a1, a0
; RV64XTHEADBS-NEXT: andi a0, a0, 1
; RV64XTHEADBS-NEXT: ret
@@ -319,7 +319,7 @@ define i1 @bittest_constant_by_var_shl_i32(i32 signext %b) nounwind {
; RV64I-LABEL: bittest_constant_by_var_shl_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 301408
-; RV64I-NEXT: addiw a1, a1, 722
+; RV64I-NEXT: addi a1, a1, 722
; RV64I-NEXT: srlw a0, a1, a0
; RV64I-NEXT: andi a0, a0, 1
; RV64I-NEXT: ret
@@ -349,7 +349,7 @@ define i1 @bittest_constant_by_var_shl_i32(i32 signext %b) nounwind {
; RV64XTHEADBS-LABEL: bittest_constant_by_var_shl_i32:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: lui a1, 301408
-; RV64XTHEADBS-NEXT: addiw a1, a1, 722
+; RV64XTHEADBS-NEXT: addi a1, a1, 722
; RV64XTHEADBS-NEXT: srlw a0, a1, a0
; RV64XTHEADBS-NEXT: andi a0, a0, 1
; RV64XTHEADBS-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
index d64fcbe57a85145..864dd8e529060c3 100644
--- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
@@ -141,7 +141,7 @@ define i64 @test_bswap_i64(i64 %a) nounwind {
; RV64I-NEXT: or a1, a3, a1
; RV64I-NEXT: and a4, a0, a4
; RV64I-NEXT: slli a4, a4, 24
-; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: slli a3, a3, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: and a2, a0, a2
@@ -497,7 +497,7 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
; RV64ZBB-NEXT: and a1, a1, a2
; RV64ZBB-NEXT: srli a0, a0, 28
; RV64ZBB-NEXT: lui a2, 986895
-; RV64ZBB-NEXT: addiw a2, a2, 240
+; RV64ZBB-NEXT: addi a2, a2, 240
; RV64ZBB-NEXT: and a0, a0, a2
; RV64ZBB-NEXT: sext.w a0, a0
; RV64ZBB-NEXT: or a0, a1, a0
@@ -613,7 +613,7 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
; RV64I-NEXT: or a1, a3, a1
; RV64I-NEXT: and a4, a0, a4
; RV64I-NEXT: slli a4, a4, 24
-; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: slli a3, a3, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: and a2, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
index fc53f70186b763d..6587f0c8c5af7bf 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
@@ -396,7 +396,7 @@ define i32 @caller_half_on_stack() nounwind {
; RV64IF-NEXT: addi sp, sp, -16
; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IF-NEXT: lui a0, 1048565
-; RV64IF-NEXT: addiw t0, a0, -1792
+; RV64IF-NEXT: addi t0, a0, -1792
; RV64IF-NEXT: li a0, 1
; RV64IF-NEXT: li a1, 2
; RV64IF-NEXT: li a2, 3
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 699bb44645711df..b54bb55432437f2 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -58,7 +58,7 @@ define i8 @test_cttz_i8(i8 %a) nounwind {
; RV64NOZBB-NEXT: andi a1, a0, 255
; RV64NOZBB-NEXT: beqz a1, .LBB0_2
; RV64NOZBB-NEXT: # %bb.1: # %cond.false
-; RV64NOZBB-NEXT: addiw a1, a0, -1
+; RV64NOZBB-NEXT: addi a1, a0, -1
; RV64NOZBB-NEXT: not a0, a0
; RV64NOZBB-NEXT: and a0, a0, a1
; RV64NOZBB-NEXT: srli a1, a0, 1
@@ -308,7 +308,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV64M-NEXT: negw a1, a0
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 30667
-; RV64M-NEXT: addiw a1, a1, 1329
+; RV64M-NEXT: addi a1, a1, 1329
; RV64M-NEXT: mul a0, a0, a1
; RV64M-NEXT: srliw a0, a0, 27
; RV64M-NEXT: lui a1, %hi(.LCPI2_0)
@@ -567,7 +567,7 @@ define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind {
;
; RV64NOZBB-LABEL: test_cttz_i8_zero_undef:
; RV64NOZBB: # %bb.0:
-; RV64NOZBB-NEXT: addiw a1, a0, -1
+; RV64NOZBB-NEXT: addi a1, a0, -1
; RV64NOZBB-NEXT: not a0, a0
; RV64NOZBB-NEXT: and a0, a0, a1
; RV64NOZBB-NEXT: srli a1, a0, 1
@@ -753,7 +753,7 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
; RV64M-NEXT: negw a1, a0
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 30667
-; RV64M-NEXT: addiw a1, a1, 1329
+; RV64M-NEXT: addi a1, a1, 1329
; RV64M-NEXT: mul a0, a0, a1
; RV64M-NEXT: srliw a0, a0, 27
; RV64M-NEXT: lui a1, %hi(.LCPI6_0)
@@ -1315,10 +1315,10 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV64M-NEXT: srli a1, a0, 4
; RV64M-NEXT: add a0, a0, a1
; RV64M-NEXT: lui a1, 61681
-; RV64M-NEXT: addiw a1, a1, -241
+; RV64M-NEXT: addi a1, a1, -241
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 4112
-; RV64M-NEXT: addiw a1, a1, 257
+; RV64M-NEXT: addi a1, a1, 257
; RV64M-NEXT: mul a0, a0, a1
; RV64M-NEXT: srliw a0, a0, 24
; RV64M-NEXT: ret
@@ -1969,10 +1969,10 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV64M-NEXT: srli a1, a0, 4
; RV64M-NEXT: add a0, a0, a1
; RV64M-NEXT: lui a1, 61681
-; RV64M-NEXT: addiw a1, a1, -241
+; RV64M-NEXT: addi a1, a1, -241
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 4112
-; RV64M-NEXT: addiw a1, a1, 257
+; RV64M-NEXT: addi a1, a1, 257
; RV64M-NEXT: mul a0, a0, a1
; RV64M-NEXT: srliw a0, a0, 24
; RV64M-NEXT: ret
@@ -2558,10 +2558,10 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV64M-NEXT: srli a1, a0, 4
; RV64M-NEXT: add a0, a0, a1
; RV64M-NEXT: lui a1, 61681
-; RV64M-NEXT: addiw a1, a1, -241
+; RV64M-NEXT: addi a1, a1, -241
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 4112
-; RV64M-NEXT: addiw a1, a1, 257
+; RV64M-NEXT: addi a1, a1, 257
; RV64M-NEXT: mul a0, a0, a1
; RV64M-NEXT: srliw a0, a0, 24
; RV64M-NEXT: ret
@@ -3072,7 +3072,7 @@ define i32 @test_parity_i32(i32 %a) {
; RV64NOZBB: # %bb.0:
; RV64NOZBB-NEXT: slli a1, a0, 32
; RV64NOZBB-NEXT: srli a1, a1, 32
-; RV64NOZBB-NEXT: srliw a0, a0, 16
+; RV64NOZBB-NEXT: srli a0, a0, 16
; RV64NOZBB-NEXT: xor a0, a1, a0
; RV64NOZBB-NEXT: srli a1, a0, 8
; RV64NOZBB-NEXT: xor a0, a0, a1
@@ -3115,7 +3115,7 @@ define i32 @test_parity_i32(i32 %a) {
; RV64XTHEADBB-LABEL: test_parity_i32:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: th.extu a1, a0, 31, 0
-; RV64XTHEADBB-NEXT: srliw a0, a0, 16
+; RV64XTHEADBB-NEXT: srli a0, a0, 16
; RV64XTHEADBB-NEXT: xor a0, a1, a0
; RV64XTHEADBB-NEXT: srli a1, a0, 8
; RV64XTHEADBB-NEXT: xor a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
index be1a834e56c2297..02072b3e4e5ca82 100644
--- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
+++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
@@ -98,7 +98,7 @@ define signext i32 @ctz_dereferencing_pointer(i64* %b) nounwind {
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: andi a0, a0, 63
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -177,7 +177,7 @@ define i64 @ctz_dereferencing_pointer_zext(i32* %b) nounwind {
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: andi a0, a0, 31
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -252,7 +252,7 @@ define signext i32 @ctz1(i32 signext %x) nounwind {
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: andi a0, a0, 31
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -325,7 +325,7 @@ define signext i32 @ctz1_flipped(i32 signext %x) nounwind {
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: andi a0, a0, 31
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -561,7 +561,7 @@ define signext i32 @ctz4(i64 %b) nounwind {
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: andi a0, a0, 63
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -804,7 +804,7 @@ define signext i32 @ctz5(i32 signext %x) nounwind {
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: andi a0, a0, 31
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -877,7 +877,7 @@ define signext i32 @ctz6(i32 signext %x) nounwind {
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: andi a0, a0, 31
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -956,7 +956,7 @@ define signext i32 @globalVar() nounwind {
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: andi a0, a0, 31
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll
index 4672b9a7d9abb51..bf19bbd8b131454 100644
--- a/llvm/test/CodeGen/RISCV/div-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll
@@ -24,7 +24,7 @@ define i32 @udiv_constant_no_add(i32 %a) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: lui a1, 838861
-; RV64-NEXT: addiw a1, a1, -819
+; RV64-NEXT: addi a1, a1, -819
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: mulhu a0, a0, a1
; RV64-NEXT: srli a0, a0, 34
@@ -50,7 +50,7 @@ define i32 @udiv_constant_add(i32 %a) nounwind {
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 32
; RV64IM-NEXT: lui a2, 149797
-; RV64IM-NEXT: addiw a2, a2, -1755
+; RV64IM-NEXT: addi a2, a2, -1755
; RV64IM-NEXT: slli a2, a2, 32
; RV64IM-NEXT: mulhu a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 32
diff --git a/llvm/test/CodeGen/RISCV/div-pow2.ll b/llvm/test/CodeGen/RISCV/div-pow2.ll
index 254e675b4ed8b5a..279fb6fd61d5555 100644
--- a/llvm/test/CodeGen/RISCV/div-pow2.ll
+++ b/llvm/test/CodeGen/RISCV/div-pow2.ll
@@ -14,7 +14,7 @@ define i32 @sdiv32_pow2_2(i32 %a) {
;
; RV64I-LABEL: sdiv32_pow2_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: srliw a1, a0, 31
+; RV64I-NEXT: srli a1, a0, 31
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 1
; RV64I-NEXT: ret
@@ -34,7 +34,7 @@ define i32 @sdiv32_pow2_negative_2(i32 %a) {
;
; RV64I-LABEL: sdiv32_pow2_negative_2:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: srliw a1, a0, 31
+; RV64I-NEXT: srli a1, a0, 31
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 1
; RV64I-NEXT: neg a0, a0
@@ -55,8 +55,8 @@ define i32 @sdiv32_pow2_2048(i32 %a) {
;
; RV64I-LABEL: sdiv32_pow2_2048:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 21
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 21
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 11
; RV64I-NEXT: ret
@@ -77,8 +77,8 @@ define i32 @sdiv32_pow2_negative_2048(i32 %a) {
;
; RV64I-LABEL: sdiv32_pow2_negative_2048:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 21
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 21
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 11
; RV64I-NEXT: neg a0, a0
@@ -99,8 +99,8 @@ define i32 @sdiv32_pow2_4096(i32 %a) {
;
; RV64I-LABEL: sdiv32_pow2_4096:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 20
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 20
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 12
; RV64I-NEXT: ret
@@ -121,8 +121,8 @@ define i32 @sdiv32_pow2_negative_4096(i32 %a) {
;
; RV64I-LABEL: sdiv32_pow2_negative_4096:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 20
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 20
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 12
; RV64I-NEXT: neg a0, a0
@@ -143,8 +143,8 @@ define i32 @sdiv32_pow2_65536(i32 %a) {
;
; RV64I-LABEL: sdiv32_pow2_65536:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 16
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 16
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 16
; RV64I-NEXT: ret
@@ -165,8 +165,8 @@ define i32 @sdiv32_pow2_negative_65536(i32 %a) {
;
; RV64I-LABEL: sdiv32_pow2_negative_65536:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 16
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 16
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 16
; RV64I-NEXT: neg a0, a0
diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll
index 2bde0349ccc7196..766742f40d02af4 100644
--- a/llvm/test/CodeGen/RISCV/div.ll
+++ b/llvm/test/CodeGen/RISCV/div.ll
@@ -69,7 +69,7 @@ define i32 @udiv_constant(i32 %a) nounwind {
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a0, a0, 32
; RV64IM-NEXT: lui a1, 838861
-; RV64IM-NEXT: addiw a1, a1, -819
+; RV64IM-NEXT: addi a1, a1, -819
; RV64IM-NEXT: slli a1, a1, 32
; RV64IM-NEXT: mulhu a0, a0, a1
; RV64IM-NEXT: srli a0, a0, 34
@@ -452,7 +452,7 @@ define i16 @udiv16(i16 %a, i16 %b) nounwind {
; RV64IM-LABEL: udiv16:
; RV64IM: # %bb.0:
; RV64IM-NEXT: lui a2, 16
-; RV64IM-NEXT: addiw a2, a2, -1
+; RV64IM-NEXT: addi a2, a2, -1
; RV64IM-NEXT: and a1, a1, a2
; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: divuw a0, a0, a1
@@ -667,16 +667,16 @@ define i32 @sdiv_pow2(i32 %a) nounwind {
;
; RV64I-LABEL: sdiv_pow2:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 29
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 29
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 3
; RV64I-NEXT: ret
;
; RV64IM-LABEL: sdiv_pow2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sraiw a1, a0, 31
-; RV64IM-NEXT: srliw a1, a1, 29
+; RV64IM-NEXT: srai a1, a0, 31
+; RV64IM-NEXT: srli a1, a1, 29
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: sraiw a0, a0, 3
; RV64IM-NEXT: ret
@@ -703,16 +703,16 @@ define i32 @sdiv_pow2_2(i32 %a) nounwind {
;
; RV64I-LABEL: sdiv_pow2_2:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 16
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 16
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 16
; RV64I-NEXT: ret
;
; RV64IM-LABEL: sdiv_pow2_2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sraiw a1, a0, 31
-; RV64IM-NEXT: srliw a1, a1, 16
+; RV64IM-NEXT: srai a1, a0, 31
+; RV64IM-NEXT: srli a1, a1, 16
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: sraiw a0, a0, 16
; RV64IM-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index 1ecb2c24bb6f228..39ac963051b5b0b 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -394,7 +394,7 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind {
; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz
; RV64IFD-NEXT: feq.d a1, fa0, fa0
; RV64IFD-NEXT: seqz a1, a1
-; RV64IFD-NEXT: addiw a1, a1, -1
+; RV64IFD-NEXT: addi a1, a1, -1
; RV64IFD-NEXT: and a0, a0, a1
; RV64IFD-NEXT: slli a0, a0, 32
; RV64IFD-NEXT: srli a0, a0, 32
@@ -420,7 +420,7 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind {
; RV64IZFINXZDINX-NEXT: fcvt.wu.d a1, a0, rtz
; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0
; RV64IZFINXZDINX-NEXT: seqz a0, a0
-; RV64IZFINXZDINX-NEXT: addiw a0, a0, -1
+; RV64IZFINXZDINX-NEXT: addi a0, a0, -1
; RV64IZFINXZDINX-NEXT: and a0, a1, a0
; RV64IZFINXZDINX-NEXT: slli a0, a0, 32
; RV64IZFINXZDINX-NEXT: srli a0, a0, 32
@@ -1891,20 +1891,20 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
; RV64I-NEXT: lui s1, 1048568
; RV64I-NEXT: .LBB26_2: # %start
; RV64I-NEXT: lui a0, 4152
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: slli a1, a0, 38
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __gtdf2 at plt
; RV64I-NEXT: blez a0, .LBB26_4
; RV64I-NEXT: # %bb.3: # %start
; RV64I-NEXT: lui s1, 8
-; RV64I-NEXT: addiw s1, s1, -1
+; RV64I-NEXT: addi s1, s1, -1
; RV64I-NEXT: .LBB26_4: # %start
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2 at plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
@@ -2074,7 +2074,7 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind {
; RV64I-NEXT: call __fixunsdfdi at plt
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: lui a0, 8312
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: slli a1, a0, 37
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __gtdf2 at plt
@@ -2298,7 +2298,7 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2 at plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
@@ -2509,7 +2509,7 @@ define zeroext i32 @fcvt_wu_d_sat_zext(double %a) nounwind {
; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz
; RV64IFD-NEXT: feq.d a1, fa0, fa0
; RV64IFD-NEXT: seqz a1, a1
-; RV64IFD-NEXT: addiw a1, a1, -1
+; RV64IFD-NEXT: addi a1, a1, -1
; RV64IFD-NEXT: and a0, a0, a1
; RV64IFD-NEXT: slli a0, a0, 32
; RV64IFD-NEXT: srli a0, a0, 32
@@ -2535,7 +2535,7 @@ define zeroext i32 @fcvt_wu_d_sat_zext(double %a) nounwind {
; RV64IZFINXZDINX-NEXT: fcvt.wu.d a1, a0, rtz
; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0
; RV64IZFINXZDINX-NEXT: seqz a0, a0
-; RV64IZFINXZDINX-NEXT: addiw a0, a0, -1
+; RV64IZFINXZDINX-NEXT: addi a0, a0, -1
; RV64IZFINXZDINX-NEXT: and a0, a1, a0
; RV64IZFINXZDINX-NEXT: slli a0, a0, 32
; RV64IZFINXZDINX-NEXT: srli a0, a0, 32
@@ -2735,13 +2735,13 @@ define signext i32 @fcvt_w_d_sat_sext(double %a) nounwind {
; RV64I-NEXT: call __gtdf2 at plt
; RV64I-NEXT: blez a0, .LBB34_4
; RV64I-NEXT: # %bb.3: # %start
-; RV64I-NEXT: addiw s1, s3, -1
+; RV64I-NEXT: addi s1, s3, -1
; RV64I-NEXT: .LBB34_4: # %start
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2 at plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
index 066b6fe9c534885..83a4f63add337fe 100644
--- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
+++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
@@ -69,7 +69,7 @@ define void @_Z3foov() {
; CHECK-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vle16.v v16, (a0)
; CHECK-NEXT: lui a0, 1048572
-; CHECK-NEXT: addiw a0, a0, 928
+; CHECK-NEXT: addi a0, a0, 928
; CHECK-NEXT: vmsbc.vx v0, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
; CHECK-NEXT: csrr a0, vlenb
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index 7224f5b79b7a112..235979b122215a6 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -243,7 +243,7 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind {
; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz
; RV64IF-NEXT: feq.s a1, fa0, fa0
; RV64IF-NEXT: seqz a1, a1
-; RV64IF-NEXT: addiw a1, a1, -1
+; RV64IF-NEXT: addi a1, a1, -1
; RV64IF-NEXT: and a0, a0, a1
; RV64IF-NEXT: slli a0, a0, 32
; RV64IF-NEXT: srli a0, a0, 32
@@ -263,7 +263,7 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind {
; RV64IZFINX-NEXT: fcvt.wu.s a1, a0, rtz
; RV64IZFINX-NEXT: feq.s a0, a0, a0
; RV64IZFINX-NEXT: seqz a0, a0
-; RV64IZFINX-NEXT: addiw a0, a0, -1
+; RV64IZFINX-NEXT: addi a0, a0, -1
; RV64IZFINX-NEXT: and a0, a1, a0
; RV64IZFINX-NEXT: slli a0, a0, 32
; RV64IZFINX-NEXT: srli a0, a0, 32
@@ -1528,13 +1528,13 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
; RV64I-NEXT: blez a0, .LBB24_4
; RV64I-NEXT: # %bb.3: # %start
; RV64I-NEXT: lui s1, 8
-; RV64I-NEXT: addiw s1, s1, -1
+; RV64I-NEXT: addi s1, s1, -1
; RV64I-NEXT: .LBB24_4: # %start
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2 at plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
@@ -1874,7 +1874,7 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind {
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2 at plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
@@ -2057,7 +2057,7 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind {
; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz
; RV64IF-NEXT: feq.s a1, fa0, fa0
; RV64IF-NEXT: seqz a1, a1
-; RV64IF-NEXT: addiw a1, a1, -1
+; RV64IF-NEXT: addi a1, a1, -1
; RV64IF-NEXT: and a0, a0, a1
; RV64IF-NEXT: slli a0, a0, 32
; RV64IF-NEXT: srli a0, a0, 32
@@ -2077,7 +2077,7 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind {
; RV64IZFINX-NEXT: fcvt.wu.s a1, a0, rtz
; RV64IZFINX-NEXT: feq.s a0, a0, a0
; RV64IZFINX-NEXT: seqz a0, a0
-; RV64IZFINX-NEXT: addiw a0, a0, -1
+; RV64IZFINX-NEXT: addi a0, a0, -1
; RV64IZFINX-NEXT: and a0, a1, a0
; RV64IZFINX-NEXT: slli a0, a0, 32
; RV64IZFINX-NEXT: srli a0, a0, 32
@@ -2238,13 +2238,13 @@ define signext i32 @fcvt_w_s_sat_sext(float %a) nounwind {
; RV64I-NEXT: call __gtsf2 at plt
; RV64I-NEXT: blez a0, .LBB32_4
; RV64I-NEXT: # %bb.3: # %start
-; RV64I-NEXT: addiw s1, s3, -1
+; RV64I-NEXT: addi s1, s3, -1
; RV64I-NEXT: .LBB32_4: # %start
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2 at plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
index 434c9f05bd16f7c..5a6e0baf752d079 100644
--- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
+++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
@@ -325,7 +325,7 @@ define dso_local void @inc_g_i32() nounwind {
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lui a0, %hi(g_4_i32)
; RV64I-NEXT: lw a1, %lo(g_4_i32)(a0)
-; RV64I-NEXT: addiw a1, a1, 1
+; RV64I-NEXT: addi a1, a1, 1
; RV64I-NEXT: sw a1, %lo(g_4_i32)(a0)
; RV64I-NEXT: ret
;
@@ -334,7 +334,7 @@ define dso_local void @inc_g_i32() nounwind {
; RV64I-MEDIUM-NEXT: .Lpcrel_hi8:
; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(g_4_i32)
; RV64I-MEDIUM-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi8)(a0)
-; RV64I-MEDIUM-NEXT: addiw a1, a1, 1
+; RV64I-MEDIUM-NEXT: addi a1, a1, 1
; RV64I-MEDIUM-NEXT: sw a1, %pcrel_lo(.Lpcrel_hi8)(a0)
; RV64I-MEDIUM-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index 4dadd6528300198..2d3f40e15fe4324 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -1703,7 +1703,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IZFH-NEXT: feq.h a1, fa0, fa0
; RV64IZFH-NEXT: seqz a1, a1
-; RV64IZFH-NEXT: addiw a1, a1, -1
+; RV64IZFH-NEXT: addi a1, a1, -1
; RV64IZFH-NEXT: and a0, a0, a1
; RV64IZFH-NEXT: slli a0, a0, 32
; RV64IZFH-NEXT: srli a0, a0, 32
@@ -1723,7 +1723,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IDZFH-NEXT: feq.h a1, fa0, fa0
; RV64IDZFH-NEXT: seqz a1, a1
-; RV64IDZFH-NEXT: addiw a1, a1, -1
+; RV64IDZFH-NEXT: addi a1, a1, -1
; RV64IDZFH-NEXT: and a0, a0, a1
; RV64IDZFH-NEXT: slli a0, a0, 32
; RV64IDZFH-NEXT: srli a0, a0, 32
@@ -1743,7 +1743,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz
; RV64IZHINX-NEXT: feq.h a0, a0, a0
; RV64IZHINX-NEXT: seqz a0, a0
-; RV64IZHINX-NEXT: addiw a0, a0, -1
+; RV64IZHINX-NEXT: addi a0, a0, -1
; RV64IZHINX-NEXT: and a0, a1, a0
; RV64IZHINX-NEXT: slli a0, a0, 32
; RV64IZHINX-NEXT: srli a0, a0, 32
@@ -1763,7 +1763,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; RV64IZDINXZHINX-NEXT: fcvt.wu.h a1, a0, rtz
; RV64IZDINXZHINX-NEXT: feq.h a0, a0, a0
; RV64IZDINXZHINX-NEXT: seqz a0, a0
-; RV64IZDINXZHINX-NEXT: addiw a0, a0, -1
+; RV64IZDINXZHINX-NEXT: addi a0, a0, -1
; RV64IZDINXZHINX-NEXT: and a0, a1, a0
; RV64IZDINXZHINX-NEXT: slli a0, a0, 32
; RV64IZDINXZHINX-NEXT: srli a0, a0, 32
@@ -1863,7 +1863,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; RV64ID-LP64-NEXT: fcvt.wu.s a0, fa5, rtz
; RV64ID-LP64-NEXT: feq.s a1, fa5, fa5
; RV64ID-LP64-NEXT: seqz a1, a1
-; RV64ID-LP64-NEXT: addiw a1, a1, -1
+; RV64ID-LP64-NEXT: addi a1, a1, -1
; RV64ID-LP64-NEXT: and a0, a0, a1
; RV64ID-LP64-NEXT: slli a0, a0, 32
; RV64ID-LP64-NEXT: srli a0, a0, 32
@@ -1893,7 +1893,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; RV64ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV64ID-NEXT: feq.s a1, fa0, fa0
; RV64ID-NEXT: seqz a1, a1
-; RV64ID-NEXT: addiw a1, a1, -1
+; RV64ID-NEXT: addi a1, a1, -1
; RV64ID-NEXT: and a0, a0, a1
; RV64ID-NEXT: slli a0, a0, 32
; RV64ID-NEXT: srli a0, a0, 32
@@ -1917,7 +1917,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; CHECK64-IZFHMIN-NEXT: feq.s a1, fa5, fa5
; CHECK64-IZFHMIN-NEXT: seqz a1, a1
-; CHECK64-IZFHMIN-NEXT: addiw a1, a1, -1
+; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1
; CHECK64-IZFHMIN-NEXT: and a0, a0, a1
; CHECK64-IZFHMIN-NEXT: slli a0, a0, 32
; CHECK64-IZFHMIN-NEXT: srli a0, a0, 32
@@ -1939,7 +1939,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; CHECK64-IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz
; CHECK64-IZHINXMIN-NEXT: feq.s a0, a0, a0
; CHECK64-IZHINXMIN-NEXT: seqz a0, a0
-; CHECK64-IZHINXMIN-NEXT: addiw a0, a0, -1
+; CHECK64-IZHINXMIN-NEXT: addi a0, a0, -1
; CHECK64-IZHINXMIN-NEXT: and a0, a1, a0
; CHECK64-IZHINXMIN-NEXT: slli a0, a0, 32
; CHECK64-IZHINXMIN-NEXT: srli a0, a0, 32
@@ -1961,7 +1961,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz
; CHECK64-IZDINXZHINXMIN-NEXT: feq.s a0, a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: seqz a0, a0
-; CHECK64-IZDINXZHINXMIN-NEXT: addiw a0, a0, -1
+; CHECK64-IZDINXZHINXMIN-NEXT: addi a0, a0, -1
; CHECK64-IZDINXZHINXMIN-NEXT: and a0, a1, a0
; CHECK64-IZDINXZHINXMIN-NEXT: slli a0, a0, 32
; CHECK64-IZDINXZHINXMIN-NEXT: srli a0, a0, 32
@@ -6551,13 +6551,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV64I-NEXT: blez a0, .LBB32_4
; RV64I-NEXT: # %bb.3: # %start
; RV64I-NEXT: lui s1, 8
-; RV64I-NEXT: addiw s1, s1, -1
+; RV64I-NEXT: addi s1, s1, -1
; RV64I-NEXT: .LBB32_4: # %start
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2 at plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
@@ -7511,7 +7511,7 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2 at plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
@@ -8128,7 +8128,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IZFH-NEXT: feq.h a1, fa0, fa0
; RV64IZFH-NEXT: seqz a1, a1
-; RV64IZFH-NEXT: addiw a1, a1, -1
+; RV64IZFH-NEXT: addi a1, a1, -1
; RV64IZFH-NEXT: and a0, a0, a1
; RV64IZFH-NEXT: slli a0, a0, 32
; RV64IZFH-NEXT: srli a0, a0, 32
@@ -8148,7 +8148,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IDZFH-NEXT: feq.h a1, fa0, fa0
; RV64IDZFH-NEXT: seqz a1, a1
-; RV64IDZFH-NEXT: addiw a1, a1, -1
+; RV64IDZFH-NEXT: addi a1, a1, -1
; RV64IDZFH-NEXT: and a0, a0, a1
; RV64IDZFH-NEXT: slli a0, a0, 32
; RV64IDZFH-NEXT: srli a0, a0, 32
@@ -8168,7 +8168,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz
; RV64IZHINX-NEXT: feq.h a0, a0, a0
; RV64IZHINX-NEXT: seqz a0, a0
-; RV64IZHINX-NEXT: addiw a0, a0, -1
+; RV64IZHINX-NEXT: addi a0, a0, -1
; RV64IZHINX-NEXT: and a0, a1, a0
; RV64IZHINX-NEXT: slli a0, a0, 32
; RV64IZHINX-NEXT: srli a0, a0, 32
@@ -8188,7 +8188,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; RV64IZDINXZHINX-NEXT: fcvt.wu.h a1, a0, rtz
; RV64IZDINXZHINX-NEXT: feq.h a0, a0, a0
; RV64IZDINXZHINX-NEXT: seqz a0, a0
-; RV64IZDINXZHINX-NEXT: addiw a0, a0, -1
+; RV64IZDINXZHINX-NEXT: addi a0, a0, -1
; RV64IZDINXZHINX-NEXT: and a0, a1, a0
; RV64IZDINXZHINX-NEXT: slli a0, a0, 32
; RV64IZDINXZHINX-NEXT: srli a0, a0, 32
@@ -8290,7 +8290,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; RV64ID-LP64-NEXT: fcvt.wu.s a0, fa5, rtz
; RV64ID-LP64-NEXT: feq.s a1, fa5, fa5
; RV64ID-LP64-NEXT: seqz a1, a1
-; RV64ID-LP64-NEXT: addiw a1, a1, -1
+; RV64ID-LP64-NEXT: addi a1, a1, -1
; RV64ID-LP64-NEXT: and a0, a0, a1
; RV64ID-LP64-NEXT: slli a0, a0, 32
; RV64ID-LP64-NEXT: srli a0, a0, 32
@@ -8320,7 +8320,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; RV64ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV64ID-NEXT: feq.s a1, fa0, fa0
; RV64ID-NEXT: seqz a1, a1
-; RV64ID-NEXT: addiw a1, a1, -1
+; RV64ID-NEXT: addi a1, a1, -1
; RV64ID-NEXT: and a0, a0, a1
; RV64ID-NEXT: slli a0, a0, 32
; RV64ID-NEXT: srli a0, a0, 32
@@ -8344,7 +8344,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; CHECK64-IZFHMIN-NEXT: feq.s a1, fa5, fa5
; CHECK64-IZFHMIN-NEXT: seqz a1, a1
-; CHECK64-IZFHMIN-NEXT: addiw a1, a1, -1
+; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1
; CHECK64-IZFHMIN-NEXT: and a0, a0, a1
; CHECK64-IZFHMIN-NEXT: slli a0, a0, 32
; CHECK64-IZFHMIN-NEXT: srli a0, a0, 32
@@ -8366,7 +8366,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; CHECK64-IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz
; CHECK64-IZHINXMIN-NEXT: feq.s a0, a0, a0
; CHECK64-IZHINXMIN-NEXT: seqz a0, a0
-; CHECK64-IZHINXMIN-NEXT: addiw a0, a0, -1
+; CHECK64-IZHINXMIN-NEXT: addi a0, a0, -1
; CHECK64-IZHINXMIN-NEXT: and a0, a1, a0
; CHECK64-IZHINXMIN-NEXT: slli a0, a0, 32
; CHECK64-IZHINXMIN-NEXT: srli a0, a0, 32
@@ -8388,7 +8388,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz
; CHECK64-IZDINXZHINXMIN-NEXT: feq.s a0, a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: seqz a0, a0
-; CHECK64-IZDINXZHINXMIN-NEXT: addiw a0, a0, -1
+; CHECK64-IZDINXZHINXMIN-NEXT: addi a0, a0, -1
; CHECK64-IZDINXZHINXMIN-NEXT: and a0, a1, a0
; CHECK64-IZDINXZHINXMIN-NEXT: slli a0, a0, 32
; CHECK64-IZDINXZHINXMIN-NEXT: srli a0, a0, 32
@@ -8518,13 +8518,13 @@ define signext i32 @fcvt_w_h_sat_sext(half %a) nounwind {
; RV64I-NEXT: call __gtsf2 at plt
; RV64I-NEXT: blez a0, .LBB40_4
; RV64I-NEXT: # %bb.3: # %start
-; RV64I-NEXT: addiw s1, s3, -1
+; RV64I-NEXT: addi s1, s3, -1
; RV64I-NEXT: .LBB40_4: # %start
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2 at plt
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index 70308857b2631a2..55c30046366d761 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -259,21 +259,13 @@ define ptr @offset_sh3add() {
}
define dso_local void @read_modify_write() local_unnamed_addr nounwind {
-; RV32-LABEL: read_modify_write:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a0, %hi(s+160)
-; RV32-NEXT: lw a1, %lo(s+160)(a0)
-; RV32-NEXT: addi a1, a1, 10
-; RV32-NEXT: sw a1, %lo(s+160)(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: read_modify_write:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: lui a0, %hi(s+160)
-; RV64-NEXT: lw a1, %lo(s+160)(a0)
-; RV64-NEXT: addiw a1, a1, 10
-; RV64-NEXT: sw a1, %lo(s+160)(a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: read_modify_write:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a0, %hi(s+160)
+; CHECK-NEXT: lw a1, %lo(s+160)(a0)
+; CHECK-NEXT: addi a1, a1, 10
+; CHECK-NEXT: sw a1, %lo(s+160)(a0)
+; CHECK-NEXT: ret
entry:
%x = load i32, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1), align 4
%y = add i32 %x, 10
@@ -375,21 +367,13 @@ define void @store_sh3add() {
}
define dso_local void @rmw_addi_addi() nounwind {
-; RV32-LABEL: rmw_addi_addi:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a0, %hi(bar+3211)
-; RV32-NEXT: lbu a1, %lo(bar+3211)(a0)
-; RV32-NEXT: addi a1, a1, 10
-; RV32-NEXT: sb a1, %lo(bar+3211)(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: rmw_addi_addi:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: lui a0, %hi(bar+3211)
-; RV64-NEXT: lbu a1, %lo(bar+3211)(a0)
-; RV64-NEXT: addiw a1, a1, 10
-; RV64-NEXT: sb a1, %lo(bar+3211)(a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: rmw_addi_addi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a0, %hi(bar+3211)
+; CHECK-NEXT: lbu a1, %lo(bar+3211)(a0)
+; CHECK-NEXT: addi a1, a1, 10
+; CHECK-NEXT: sb a1, %lo(bar+3211)(a0)
+; CHECK-NEXT: ret
entry:
%0 = load i8, ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 3211)
%1 = add i8 %0, 10
diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll
index 036a5d49d441c2d..afbbb9548c85d05 100644
--- a/llvm/test/CodeGen/RISCV/iabs.ll
+++ b/llvm/test/CodeGen/RISCV/iabs.ll
@@ -170,7 +170,7 @@ define i32 @abs32(i32 %x) {
;
; RV64I-LABEL: abs32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srai a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
@@ -201,7 +201,7 @@ define i32 @select_abs32(i32 %x) {
;
; RV64I-LABEL: select_abs32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srai a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
@@ -482,7 +482,7 @@ define i64 @zext_abs32(i32 %x) {
;
; RV64I-LABEL: zext_abs32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srai a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index 738318e4bd6774a..e191933b42338aa 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -588,35 +588,35 @@ define i64 @imm64_6() nounwind {
; RV64I-LABEL: imm64_6:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a0, 9321
-; RV64I-NEXT: addiw a0, a0, -1329
+; RV64I-NEXT: addi a0, a0, -1329
; RV64I-NEXT: slli a0, a0, 35
; RV64I-NEXT: ret
;
; RV64IZBA-LABEL: imm64_6:
; RV64IZBA: # %bb.0:
; RV64IZBA-NEXT: lui a0, 9321
-; RV64IZBA-NEXT: addiw a0, a0, -1329
+; RV64IZBA-NEXT: addi a0, a0, -1329
; RV64IZBA-NEXT: slli a0, a0, 35
; RV64IZBA-NEXT: ret
;
; RV64IZBB-LABEL: imm64_6:
; RV64IZBB: # %bb.0:
; RV64IZBB-NEXT: lui a0, 9321
-; RV64IZBB-NEXT: addiw a0, a0, -1329
+; RV64IZBB-NEXT: addi a0, a0, -1329
; RV64IZBB-NEXT: slli a0, a0, 35
; RV64IZBB-NEXT: ret
;
; RV64IZBS-LABEL: imm64_6:
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: lui a0, 9321
-; RV64IZBS-NEXT: addiw a0, a0, -1329
+; RV64IZBS-NEXT: addi a0, a0, -1329
; RV64IZBS-NEXT: slli a0, a0, 35
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm64_6:
; RV64IXTHEADBB: # %bb.0:
; RV64IXTHEADBB-NEXT: lui a0, 9321
-; RV64IXTHEADBB-NEXT: addiw a0, a0, -1329
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1329
; RV64IXTHEADBB-NEXT: slli a0, a0, 35
; RV64IXTHEADBB-NEXT: ret
ret i64 1311768464867721216 ; 0x1234_5678_0000_0000
@@ -709,7 +709,7 @@ define i64 @imm64_8() nounwind {
; RV64IZBA-LABEL: imm64_8:
; RV64IZBA: # %bb.0:
; RV64IZBA-NEXT: lui a0, 596523
-; RV64IZBA-NEXT: addiw a0, a0, 965
+; RV64IZBA-NEXT: addi a0, a0, 965
; RV64IZBA-NEXT: slli.uw a0, a0, 13
; RV64IZBA-NEXT: addi a0, a0, -1347
; RV64IZBA-NEXT: slli a0, a0, 12
@@ -2298,7 +2298,7 @@ define i64 @imm_12900936431479() {
; RV64IZBA-LABEL: imm_12900936431479:
; RV64IZBA: # %bb.0:
; RV64IZBA-NEXT: lui a0, 768956
-; RV64IZBA-NEXT: addiw a0, a0, -1093
+; RV64IZBA-NEXT: addi a0, a0, -1093
; RV64IZBA-NEXT: slli.uw a0, a0, 12
; RV64IZBA-NEXT: addi a0, a0, 1911
; RV64IZBA-NEXT: ret
@@ -2353,7 +2353,7 @@ define i64 @imm_12900918536874() {
; RV64IZBA-LABEL: imm_12900918536874:
; RV64IZBA: # %bb.0:
; RV64IZBA-NEXT: lui a0, 768955
-; RV64IZBA-NEXT: addiw a0, a0, -1365
+; RV64IZBA-NEXT: addi a0, a0, -1365
; RV64IZBA-NEXT: slli.uw a0, a0, 12
; RV64IZBA-NEXT: addi a0, a0, -1366
; RV64IZBA-NEXT: ret
@@ -2408,7 +2408,7 @@ define i64 @imm_12900925247761() {
; RV64IZBA-LABEL: imm_12900925247761:
; RV64IZBA: # %bb.0:
; RV64IZBA-NEXT: lui a0, 768955
-; RV64IZBA-NEXT: addiw a0, a0, 273
+; RV64IZBA-NEXT: addi a0, a0, 273
; RV64IZBA-NEXT: slli.uw a0, a0, 12
; RV64IZBA-NEXT: addi a0, a0, 273
; RV64IZBA-NEXT: ret
@@ -3030,7 +3030,7 @@ define i64 @imm64_same_lo_hi_negative() nounwind {
; RV64IZBA-LABEL: imm64_same_lo_hi_negative:
; RV64IZBA: # %bb.0:
; RV64IZBA-NEXT: lui a0, 526344
-; RV64IZBA-NEXT: addiw a0, a0, 128
+; RV64IZBA-NEXT: addi a0, a0, 128
; RV64IZBA-NEXT: slli a1, a0, 32
; RV64IZBA-NEXT: add.uw a0, a0, a1
; RV64IZBA-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll
index bcca8064e97a2fb..9e7f2e9525d3b4b 100644
--- a/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll
+++ b/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll
@@ -264,7 +264,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
;
; RV64-LABEL: add_ultcmp_i16_i8:
; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, -128
+; RV64-NEXT: addi a0, a0, -128
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 56
; RV64-NEXT: sltiu a0, a0, 255
@@ -431,7 +431,7 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
;
; RV64-LABEL: add_ulecmp_i16_i8:
; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, -128
+; RV64-NEXT: addi a0, a0, -128
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 56
; RV64-NEXT: sltiu a0, a0, 255
@@ -457,7 +457,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
;
; RV64I-LABEL: add_ugecmp_i16_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 128
+; RV64I-NEXT: addi a0, a0, 128
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 256
@@ -474,7 +474,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ugecmp_i16_i8:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 128
+; RV64ZBB-NEXT: addi a0, a0, 128
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 256
; RV64ZBB-NEXT: xori a0, a0, 1
@@ -645,7 +645,7 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
;
; RV64I-LABEL: add_ugtcmp_i16_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 128
+; RV64I-NEXT: addi a0, a0, 128
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 256
@@ -662,7 +662,7 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ugtcmp_i16_i8:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 128
+; RV64ZBB-NEXT: addi a0, a0, 128
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 256
; RV64ZBB-NEXT: xori a0, a0, 1
@@ -751,7 +751,7 @@ define i1 @add_ugecmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_cmp:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: zext.h a1, a1
-; RV64ZBB-NEXT: addiw a0, a0, 128
+; RV64ZBB-NEXT: addi a0, a0, 128
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltu a0, a0, a1
; RV64ZBB-NEXT: xori a0, a0, 1
@@ -774,7 +774,7 @@ define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind {
;
; RV64I-LABEL: add_ugecmp_bad_i8_i16:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 128
+; RV64I-NEXT: addi a0, a0, 128
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 128
@@ -791,7 +791,7 @@ define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ugecmp_bad_i8_i16:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 128
+; RV64ZBB-NEXT: addi a0, a0, 128
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 128
; RV64ZBB-NEXT: xori a0, a0, 1
@@ -814,7 +814,7 @@ define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
;
; RV64I-LABEL: add_ugecmp_bad_i16_i8_c0notpoweroftwo:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 192
+; RV64I-NEXT: addi a0, a0, 192
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 256
@@ -831,7 +831,7 @@ define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_c0notpoweroftwo:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 192
+; RV64ZBB-NEXT: addi a0, a0, 192
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 256
; RV64ZBB-NEXT: xori a0, a0, 1
@@ -854,7 +854,7 @@ define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
;
; RV64I-LABEL: add_ugecmp_bad_i16_i8_c1notpoweroftwo:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 128
+; RV64I-NEXT: addi a0, a0, 128
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 768
@@ -871,7 +871,7 @@ define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_c1notpoweroftwo:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 128
+; RV64ZBB-NEXT: addi a0, a0, 128
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 768
; RV64ZBB-NEXT: xori a0, a0, 1
@@ -894,7 +894,7 @@ define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind {
;
; RV64I-LABEL: add_ugecmp_bad_i16_i8_magic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 64
+; RV64I-NEXT: addi a0, a0, 64
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 256
@@ -911,7 +911,7 @@ define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_magic:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 64
+; RV64ZBB-NEXT: addi a0, a0, 64
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 256
; RV64ZBB-NEXT: xori a0, a0, 1
@@ -934,7 +934,7 @@ define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind {
;
; RV64I-LABEL: add_ugecmp_bad_i16_i4:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 8
+; RV64I-NEXT: addi a0, a0, 8
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 16
@@ -951,7 +951,7 @@ define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ugecmp_bad_i16_i4:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 8
+; RV64ZBB-NEXT: addi a0, a0, 8
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 16
; RV64ZBB-NEXT: xori a0, a0, 1
@@ -974,7 +974,7 @@ define i1 @add_ugecmp_bad_i24_i8(i24 %x) nounwind {
;
; RV64-LABEL: add_ugecmp_bad_i24_i8:
; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, 128
+; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: slli a0, a0, 40
; RV64-NEXT: srli a0, a0, 40
; RV64-NEXT: sltiu a0, a0, 256
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll b/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll
index 03419932d030a96..21254b6302038ac 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll
@@ -13,7 +13,7 @@ define i32 @func1(i32 %x) #0 {
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .cfi_offset s0, -16
; CHECK-NEXT: mul a0, a0, a0
-; CHECK-NEXT: addiw s0, a0, 1
+; CHECK-NEXT: addi s0, a0, 1
; CHECK-NEXT: li a0, 4
; CHECK-NEXT: call __cxa_allocate_exception at plt
; CHECK-NEXT: sw s0, 0(a0)
@@ -40,7 +40,7 @@ define i32 @func2(i32 %x) #0 {
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .cfi_offset s0, -16
; CHECK-NEXT: mul a0, a0, a0
-; CHECK-NEXT: addiw s0, a0, 1
+; CHECK-NEXT: addi s0, a0, 1
; CHECK-NEXT: li a0, 4
; CHECK-NEXT: call __cxa_allocate_exception at plt
; CHECK-NEXT: sw s0, 0(a0)
diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll
index 932bd2e13d62a18..d831a2a002a3ede 100644
--- a/llvm/test/CodeGen/RISCV/memcpy.ll
+++ b/llvm/test/CodeGen/RISCV/memcpy.ll
@@ -167,7 +167,7 @@ define void @t2(ptr nocapture %C) nounwind {
; RV64-FAST-NEXT: ld a2, %lo(.L.str2)(a1)
; RV64-FAST-NEXT: sd a2, 0(a0)
; RV64-FAST-NEXT: lui a2, 1156
-; RV64-FAST-NEXT: addiw a2, a2, 332
+; RV64-FAST-NEXT: addi a2, a2, 332
; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str2)
; RV64-FAST-NEXT: ld a3, 24(a1)
; RV64-FAST-NEXT: ld a4, 16(a1)
@@ -332,10 +332,10 @@ define void @t5(ptr nocapture %C) nounwind {
; RV64-FAST-LABEL: t5:
; RV64-FAST: # %bb.0: # %entry
; RV64-FAST-NEXT: lui a1, 1349
-; RV64-FAST-NEXT: addiw a1, a1, 857
+; RV64-FAST-NEXT: addi a1, a1, 857
; RV64-FAST-NEXT: sw a1, 3(a0)
; RV64-FAST-NEXT: lui a1, 365861
-; RV64-FAST-NEXT: addiw a1, a1, -1980
+; RV64-FAST-NEXT: addi a1, a1, -1980
; RV64-FAST-NEXT: sw a1, 0(a0)
; RV64-FAST-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll
index 06be6cbd96410a2..9ebed2b430f14ce 100644
--- a/llvm/test/CodeGen/RISCV/neg-abs.ll
+++ b/llvm/test/CodeGen/RISCV/neg-abs.ll
@@ -27,14 +27,14 @@ define i32 @neg_abs32(i32 %x) {
;
; RV64I-LABEL: neg_abs32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srai a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: neg_abs32:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: sraiw a1, a0, 31
+; RV64ZBB-NEXT: srai a1, a0, 31
; RV64ZBB-NEXT: xor a0, a0, a1
; RV64ZBB-NEXT: subw a0, a1, a0
; RV64ZBB-NEXT: ret
@@ -59,14 +59,14 @@ define i32 @select_neg_abs32(i32 %x) {
;
; RV64I-LABEL: select_neg_abs32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srai a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: select_neg_abs32:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: sraiw a1, a0, 31
+; RV64ZBB-NEXT: srai a1, a0, 31
; RV64ZBB-NEXT: xor a0, a0, a1
; RV64ZBB-NEXT: subw a0, a1, a0
; RV64ZBB-NEXT: ret
@@ -177,7 +177,7 @@ define i32 @neg_abs32_multiuse(i32 %x, ptr %y) {
;
; RV64I-LABEL: neg_abs32_multiuse:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a2, a0, 31
+; RV64I-NEXT: srai a2, a0, 31
; RV64I-NEXT: xor a0, a0, a2
; RV64I-NEXT: subw a2, a0, a2
; RV64I-NEXT: negw a0, a2
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index 42f998e68bb6e71..7c3294fa81dcfe4 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -551,7 +551,7 @@ define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, ptr %p) {
;
; RV64-LABEL: uaddo_i8_increment_noncanonical_1:
; RV64: # %bb.0:
-; RV64-NEXT: addiw a2, a0, 1
+; RV64-NEXT: addi a2, a0, 1
; RV64-NEXT: andi a0, a2, 255
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: sb a2, 0(a1)
@@ -594,7 +594,7 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
;
; RV64-LABEL: uaddo_i16_increment_noncanonical_3:
; RV64: # %bb.0:
-; RV64-NEXT: addiw a2, a0, 1
+; RV64-NEXT: addi a2, a0, 1
; RV64-NEXT: slli a0, a2, 48
; RV64-NEXT: srli a0, a0, 48
; RV64-NEXT: seqz a0, a0
@@ -672,7 +672,7 @@ define i1 @uaddo_i32_decrement_alt(i32 signext %x, ptr %p) {
; RV64-LABEL: uaddo_i32_decrement_alt:
; RV64: # %bb.0:
; RV64-NEXT: snez a2, a0
-; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: sw a0, 0(a1)
; RV64-NEXT: mv a0, a2
; RV64-NEXT: ret
@@ -915,7 +915,7 @@ define i1 @usubo_ult_constant_op1_i16(i16 %x, ptr %p) {
; RV64: # %bb.0:
; RV64-NEXT: slli a2, a0, 48
; RV64-NEXT: srli a2, a2, 48
-; RV64-NEXT: addiw a3, a0, -44
+; RV64-NEXT: addi a3, a0, -44
; RV64-NEXT: sltiu a0, a2, 44
; RV64-NEXT: sh a3, 0(a1)
; RV64-NEXT: ret
@@ -939,7 +939,7 @@ define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) {
; RV64: # %bb.0:
; RV64-NEXT: andi a2, a0, 255
; RV64-NEXT: sltiu a2, a2, 45
-; RV64-NEXT: addiw a0, a0, -45
+; RV64-NEXT: addi a0, a0, -45
; RV64-NEXT: sb a0, 0(a1)
; RV64-NEXT: mv a0, a2
; RV64-NEXT: ret
@@ -962,7 +962,7 @@ define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) {
; RV64-LABEL: usubo_eq_constant1_op1_i32:
; RV64: # %bb.0:
; RV64-NEXT: sext.w a2, a0
-; RV64-NEXT: addiw a3, a0, -1
+; RV64-NEXT: addi a3, a0, -1
; RV64-NEXT: seqz a0, a2
; RV64-NEXT: sw a3, 0(a1)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rem.ll b/llvm/test/CodeGen/RISCV/rem.ll
index 3d5fd3fd43317f2..0b239e6f10fdcac 100644
--- a/llvm/test/CodeGen/RISCV/rem.ll
+++ b/llvm/test/CodeGen/RISCV/rem.ll
@@ -123,8 +123,8 @@ define i32 @srem_pow2(i32 %a) nounwind {
;
; RV64I-LABEL: srem_pow2:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 29
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 29
; RV64I-NEXT: add a1, a0, a1
; RV64I-NEXT: andi a1, a1, -8
; RV64I-NEXT: subw a0, a0, a1
@@ -132,8 +132,8 @@ define i32 @srem_pow2(i32 %a) nounwind {
;
; RV64IM-LABEL: srem_pow2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sraiw a1, a0, 31
-; RV64IM-NEXT: srliw a1, a1, 29
+; RV64IM-NEXT: srai a1, a0, 31
+; RV64IM-NEXT: srli a1, a1, 29
; RV64IM-NEXT: add a1, a0, a1
; RV64IM-NEXT: andi a1, a1, -8
; RV64IM-NEXT: subw a0, a0, a1
@@ -165,8 +165,8 @@ define i32 @srem_pow2_2(i32 %a) nounwind {
;
; RV64I-LABEL: srem_pow2_2:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 16
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 16
; RV64I-NEXT: add a1, a0, a1
; RV64I-NEXT: lui a2, 1048560
; RV64I-NEXT: and a1, a1, a2
@@ -175,8 +175,8 @@ define i32 @srem_pow2_2(i32 %a) nounwind {
;
; RV64IM-LABEL: srem_pow2_2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sraiw a1, a0, 31
-; RV64IM-NEXT: srliw a1, a1, 16
+; RV64IM-NEXT: srai a1, a0, 31
+; RV64IM-NEXT: srli a1, a1, 16
; RV64IM-NEXT: add a1, a0, a1
; RV64IM-NEXT: lui a2, 1048560
; RV64IM-NEXT: and a1, a1, a2
@@ -578,7 +578,7 @@ define i16 @urem16(i16 %a, i16 %b) nounwind {
; RV64IM-LABEL: urem16:
; RV64IM: # %bb.0:
; RV64IM-NEXT: lui a2, 16
-; RV64IM-NEXT: addiw a2, a2, -1
+; RV64IM-NEXT: addi a2, a2, -1
; RV64IM-NEXT: and a1, a1, a2
; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: remuw a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
index 61477589f8455e5..c343ef5b451de8d 100644
--- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
@@ -12,7 +12,7 @@ define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) {
; CHECK-NEXT: .LBB0_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lw a2, 0(a0)
-; CHECK-NEXT: addiw a2, a2, 4
+; CHECK-NEXT: addi a2, a2, 4
; CHECK-NEXT: sw a2, 0(a0)
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, 4
@@ -62,9 +62,9 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) {
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lw a5, -4(a4)
; CHECK-NEXT: lw a6, 0(a4)
-; CHECK-NEXT: addiw a5, a5, 4
+; CHECK-NEXT: addi a5, a5, 4
; CHECK-NEXT: sw a5, -4(a4)
-; CHECK-NEXT: addiw a6, a6, 4
+; CHECK-NEXT: addi a6, a6, 4
; CHECK-NEXT: sw a6, 0(a4)
; CHECK-NEXT: addi a3, a3, 2
; CHECK-NEXT: addi a4, a4, 8
@@ -75,7 +75,7 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) {
; CHECK-NEXT: slli a3, a3, 2
; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: lw a1, 0(a0)
-; CHECK-NEXT: addiw a1, a1, 4
+; CHECK-NEXT: addi a1, a1, 4
; CHECK-NEXT: sw a1, 0(a0)
; CHECK-NEXT: .LBB1_7: # %for.cond.cleanup
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll b/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll
index f957d25f5c490e7..eb9d58ea1e16fb7 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll
@@ -9,10 +9,10 @@ define i32 @foo(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: mul a0, a0, a0
-; CHECK-NEXT: addiw a0, a0, 1
+; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: mul a0, a0, a0
; CHECK-NEXT: add a0, a0, a2
-; CHECK-NEXT: addiw a0, a0, 1
+; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: sllw a0, a0, a1
; CHECK-NEXT: ret
%b = mul i32 %x, %x
@@ -169,7 +169,7 @@ define signext i32 @andi_srliw(i32 signext %0, ptr %1, i32 signext %2) {
; CHECK-LABEL: andi_srliw:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a3, a0, -8
-; CHECK-NEXT: srliw a4, a0, 3
+; CHECK-NEXT: srli a4, a0, 3
; CHECK-NEXT: addw a0, a3, a2
; CHECK-NEXT: sw a4, 0(a1)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
index 1d495b11f614030..7392f5e1a20e3d6 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
@@ -1659,14 +1659,14 @@ define signext i32 @sext_addiw_zext(i32 zeroext %a) nounwind {
define zeroext i32 @zext_addiw_aext(i32 %a) nounwind {
; RV64I-LABEL: zext_addiw_aext:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 7
+; RV64I-NEXT: addi a0, a0, 7
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_addiw_aext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: addiw a0, a0, 7
+; RV64ZBA-NEXT: addi a0, a0, 7
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = add i32 %a, 7
@@ -1676,14 +1676,14 @@ define zeroext i32 @zext_addiw_aext(i32 %a) nounwind {
define zeroext i32 @zext_addiw_sext(i32 signext %a) nounwind {
; RV64I-LABEL: zext_addiw_sext:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 8
+; RV64I-NEXT: addi a0, a0, 8
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_addiw_sext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: addiw a0, a0, 8
+; RV64ZBA-NEXT: addi a0, a0, 8
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = add i32 %a, 8
@@ -1693,14 +1693,14 @@ define zeroext i32 @zext_addiw_sext(i32 signext %a) nounwind {
define zeroext i32 @zext_addiw_zext(i32 zeroext %a) nounwind {
; RV64I-LABEL: zext_addiw_zext:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 9
+; RV64I-NEXT: addi a0, a0, 9
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_addiw_zext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: addiw a0, a0, 9
+; RV64ZBA-NEXT: addi a0, a0, 9
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = add i32 %a, 9
@@ -1943,7 +1943,7 @@ define zeroext i32 @zext_sraiw_aext(i32 %a) nounwind {
;
; RV64ZBA-LABEL: zext_sraiw_aext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: sraiw a0, a0, 7
+; RV64ZBA-NEXT: srai a0, a0, 7
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, 7
@@ -1970,7 +1970,7 @@ define zeroext i32 @zext_sraiw_zext(i32 zeroext %a) nounwind {
;
; RV64ZBA-LABEL: zext_sraiw_zext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: sraiw a0, a0, 9
+; RV64ZBA-NEXT: srai a0, a0, 9
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = ashr i32 %a, 9
diff --git a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
index 4a9d8b08a4b2f74..f38aa71fb158d00 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
@@ -113,7 +113,7 @@ define signext i32 @test9(ptr %0, i64 %1) {
; RV64I-LABEL: test9:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a2, 1
-; RV64I-NEXT: addiw a2, a2, 1
+; RV64I-NEXT: addi a2, a2, 1
; RV64I-NEXT: addw a1, a1, a2
; RV64I-NEXT: slli a1, a1, 2
; RV64I-NEXT: add a0, a0, a1
@@ -133,7 +133,7 @@ define signext i32 @test10(ptr %0, i64 %1) {
; RV64I-LABEL: test10:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a2, 30141
-; RV64I-NEXT: addiw a2, a2, -747
+; RV64I-NEXT: addi a2, a2, -747
; RV64I-NEXT: subw a2, a2, a1
; RV64I-NEXT: slli a2, a2, 2
; RV64I-NEXT: add a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
index eea04ae03f8d652..ab1691543c78af1 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
@@ -8,10 +8,10 @@ define signext i32 @addw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: not a2, a0
; CHECK-NEXT: add a2, a2, a1
-; CHECK-NEXT: addiw a3, a0, 1
+; CHECK-NEXT: addi a3, a0, 1
; CHECK-NEXT: mul a3, a2, a3
; CHECK-NEXT: subw a1, a1, a0
-; CHECK-NEXT: addiw a1, a1, -2
+; CHECK-NEXT: addi a1, a1, -2
; CHECK-NEXT: slli a1, a1, 32
; CHECK-NEXT: slli a2, a2, 32
; CHECK-NEXT: mulhu a1, a2, a1
@@ -56,7 +56,7 @@ define signext i32 @subw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
; CHECK-NEXT: add a3, a2, a1
; CHECK-NEXT: mul a2, a3, a2
; CHECK-NEXT: subw a1, a1, a0
-; CHECK-NEXT: addiw a1, a1, -2
+; CHECK-NEXT: addi a1, a1, -2
; CHECK-NEXT: slli a1, a1, 32
; CHECK-NEXT: slli a3, a3, 32
; CHECK-NEXT: mulhu a1, a3, a1
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
index 90a10d02636bd03..cdcadee3b82bf08 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -173,7 +173,7 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
;
; RV64XTHEADBB-LABEL: log2_ceil_i32:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: addiw a0, a0, -1
+; RV64XTHEADBB-NEXT: addi a0, a0, -1
; RV64XTHEADBB-NEXT: not a0, a0
; RV64XTHEADBB-NEXT: slli a0, a0, 32
; RV64XTHEADBB-NEXT: th.ff0 a0, a0
@@ -300,7 +300,7 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
;
; RV64XTHEADBB-LABEL: ctlz_lshr_i32:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: srliw a0, a0, 1
+; RV64XTHEADBB-NEXT: srli a0, a0, 1
; RV64XTHEADBB-NEXT: not a0, a0
; RV64XTHEADBB-NEXT: slli a0, a0, 32
; RV64XTHEADBB-NEXT: th.ff0 a0, a0
@@ -774,9 +774,9 @@ define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: srli a2, a0, 8
; RV64I-NEXT: lui a3, 16
-; RV64I-NEXT: addiw a3, a3, -256
+; RV64I-NEXT: addi a3, a3, -256
; RV64I-NEXT: and a2, a2, a3
-; RV64I-NEXT: srliw a4, a0, 24
+; RV64I-NEXT: srli a4, a0, 24
; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: and a3, a0, a3
; RV64I-NEXT: slli a3, a3, 8
@@ -817,7 +817,7 @@ define i64 @bswap_i64(i64 %a) {
; RV64I-NEXT: or a1, a3, a1
; RV64I-NEXT: and a4, a0, a4
; RV64I-NEXT: slli a4, a4, 24
-; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: slli a3, a3, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: and a2, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll
index 07a4c093f06e68d..a7af8ab348e99e3 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll
@@ -30,7 +30,7 @@ define signext i32 @orcb32_knownbits(i32 signext %a) nounwind {
; RV64ZBB-NEXT: lui a1, 1044480
; RV64ZBB-NEXT: and a0, a0, a1
; RV64ZBB-NEXT: lui a1, 2048
-; RV64ZBB-NEXT: addiw a1, a1, 1
+; RV64ZBB-NEXT: addi a1, a1, 1
; RV64ZBB-NEXT: or a0, a0, a1
; RV64ZBB-NEXT: orc.b a0, a0
; RV64ZBB-NEXT: sext.w a0, a0
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
index d9afb7c00ce58f2..e12b7cc3dc01d35 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
@@ -278,7 +278,7 @@ define signext i32 @rori_i32_fshl(i32 signext %a) nounwind {
define void @rori_i32_fshl_nosext(i32 signext %a, ptr %x) nounwind {
; RV64I-LABEL: rori_i32_fshl_nosext:
; RV64I: # %bb.0:
-; RV64I-NEXT: srliw a2, a0, 1
+; RV64I-NEXT: srli a2, a0, 1
; RV64I-NEXT: slli a0, a0, 31
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: sw a0, 0(a1)
@@ -315,7 +315,7 @@ define void @rori_i32_fshr_nosext(i32 signext %a, ptr %x) nounwind {
; RV64I-LABEL: rori_i32_fshr_nosext:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a2, a0, 1
-; RV64I-NEXT: srliw a0, a0, 31
+; RV64I-NEXT: srli a0, a0, 31
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: sw a0, 0(a1)
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 18f1574b5352679..ddbe4a2fc38e3f8 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -169,7 +169,7 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
;
; RV64ZBB-LABEL: log2_ceil_i32:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, -1
+; RV64ZBB-NEXT: addi a0, a0, -1
; RV64ZBB-NEXT: clzw a0, a0
; RV64ZBB-NEXT: li a1, 32
; RV64ZBB-NEXT: sub a0, a1, a0
@@ -292,7 +292,7 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
;
; RV64ZBB-LABEL: ctlz_lshr_i32:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: srliw a0, a0, 1
+; RV64ZBB-NEXT: srli a0, a0, 1
; RV64ZBB-NEXT: clzw a0, a0
; RV64ZBB-NEXT: ret
%1 = lshr i32 %a, 1
@@ -768,11 +768,11 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
; RV64I-LABEL: ctpop_v2i32_ult_two:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a2, a0, -1
+; RV64I-NEXT: addi a2, a0, -1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
-; RV64I-NEXT: addiw a2, a1, -1
+; RV64I-NEXT: addi a2, a1, -1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: sext.w a1, a1
; RV64I-NEXT: seqz a1, a1
@@ -793,11 +793,11 @@ define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind {
; RV64I-LABEL: ctpop_v2i32_ugt_one:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a2, a0, -1
+; RV64I-NEXT: addi a2, a0, -1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a2, a1, -1
+; RV64I-NEXT: addi a2, a1, -1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: sext.w a1, a1
; RV64I-NEXT: snez a1, a1
@@ -822,13 +822,13 @@ define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: sext.w a2, a1
; RV64I-NEXT: sext.w a3, a0
-; RV64I-NEXT: addiw a4, a0, -1
+; RV64I-NEXT: addi a4, a0, -1
; RV64I-NEXT: and a0, a0, a4
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: snez a3, a3
; RV64I-NEXT: and a0, a3, a0
-; RV64I-NEXT: addiw a3, a1, -1
+; RV64I-NEXT: addi a3, a1, -1
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: sext.w a1, a1
; RV64I-NEXT: seqz a1, a1
@@ -855,13 +855,13 @@ define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: sext.w a2, a1
; RV64I-NEXT: sext.w a3, a0
-; RV64I-NEXT: addiw a4, a0, -1
+; RV64I-NEXT: addi a4, a0, -1
; RV64I-NEXT: and a0, a0, a4
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: seqz a3, a3
; RV64I-NEXT: or a0, a3, a0
-; RV64I-NEXT: addiw a3, a1, -1
+; RV64I-NEXT: addi a3, a1, -1
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: sext.w a1, a1
; RV64I-NEXT: snez a1, a1
@@ -1404,7 +1404,7 @@ declare i32 @llvm.abs.i32(i32, i1 immarg)
define i32 @abs_i32(i32 %x) {
; RV64I-LABEL: abs_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srai a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
@@ -1518,9 +1518,9 @@ define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: srli a2, a0, 8
; RV64I-NEXT: lui a3, 16
-; RV64I-NEXT: addiw a3, a3, -256
+; RV64I-NEXT: addi a3, a3, -256
; RV64I-NEXT: and a2, a2, a3
-; RV64I-NEXT: srliw a4, a0, 24
+; RV64I-NEXT: srli a4, a0, 24
; RV64I-NEXT: or a2, a2, a4
; RV64I-NEXT: and a3, a0, a3
; RV64I-NEXT: slli a3, a3, 8
@@ -1562,7 +1562,7 @@ define i64 @bswap_i64(i64 %a) {
; RV64I-NEXT: or a1, a3, a1
; RV64I-NEXT: and a4, a0, a4
; RV64I-NEXT: slli a4, a4, 24
-; RV64I-NEXT: srliw a3, a0, 24
+; RV64I-NEXT: srli a3, a0, 24
; RV64I-NEXT: slli a3, a3, 32
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: and a2, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
index fa96c576017ba5a..dd6248233975a7e 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
@@ -303,7 +303,7 @@ define i64 @pack_i64_imm() {
; RV64ZBKB-LABEL: pack_i64_imm:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: lui a0, 65793
-; RV64ZBKB-NEXT: addiw a0, a0, 16
+; RV64ZBKB-NEXT: addi a0, a0, 16
; RV64ZBKB-NEXT: pack a0, a0, a0
; RV64ZBKB-NEXT: ret
ret i64 1157442765409226768 ; 0x0101010101010101
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
index aa3d7b3fa8a7c87..3514fa66f5886d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
@@ -229,63 +229,34 @@ define <vscale x 64 x i8> @bitreverse_nxv64i8(<vscale x 64 x i8> %va) {
declare <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8>)
define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv1i16:
; CHECK-ZVBB: # %bb.0:
@@ -298,63 +269,34 @@ define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) {
declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>)
define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv2i16:
; CHECK-ZVBB: # %bb.0:
@@ -367,63 +309,34 @@ define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) {
declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>)
define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv4i16:
; CHECK-ZVBB: # %bb.0:
@@ -436,63 +349,34 @@ define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) {
declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>)
define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv8i16:
; CHECK-ZVBB: # %bb.0:
@@ -505,63 +389,34 @@ define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) {
declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>)
define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv16i16:
; CHECK-ZVBB: # %bb.0:
@@ -574,63 +429,34 @@ define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) {
declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>)
define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: bitreverse_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv32i16:
; CHECK-ZVBB: # %bb.0:
@@ -643,79 +469,42 @@ define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) {
declare <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16>)
define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32-LABEL: bitreverse_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv1i32:
; CHECK-ZVBB: # %bb.0:
@@ -728,79 +517,42 @@ define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) {
declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>)
define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32-LABEL: bitreverse_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv2i32:
; CHECK-ZVBB: # %bb.0:
@@ -813,79 +565,42 @@ define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) {
declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>)
define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32-LABEL: bitreverse_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsrl.vi v12, v8, 24
-; RV32-NEXT: vor.vv v10, v10, v12
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsll.vi v12, v12, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsrl.vi v12, v8, 24
-; RV64-NEXT: vor.vv v10, v10, v12
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsll.vi v12, v12, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsrl.vi v12, v8, 24
+; CHECK-NEXT: vor.vv v10, v10, v12
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsll.vi v12, v12, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv4i32:
; CHECK-ZVBB: # %bb.0:
@@ -898,79 +613,42 @@ define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) {
declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>)
define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32-LABEL: bitreverse_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsrl.vi v16, v8, 24
-; RV32-NEXT: vor.vv v12, v12, v16
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsll.vi v16, v16, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsrl.vi v16, v8, 24
-; RV64-NEXT: vor.vv v12, v12, v16
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsll.vi v16, v16, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsrl.vi v16, v8, 24
+; CHECK-NEXT: vor.vv v12, v12, v16
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsll.vi v16, v16, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv8i32:
; CHECK-ZVBB: # %bb.0:
@@ -983,79 +661,42 @@ define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) {
declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>)
define <vscale x 16 x i32> @bitreverse_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32-LABEL: bitreverse_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsrl.vi v24, v8, 24
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vand.vx v24, v8, a0
-; RV32-NEXT: vsll.vi v24, v24, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bitreverse_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsrl.vi v24, v8, 24
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vand.vx v24, v8, a0
-; RV64-NEXT: vsll.vi v24, v24, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: bitreverse_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsrl.vi v24, v8, 24
+; CHECK-NEXT: vor.vv v16, v16, v24
+; CHECK-NEXT: vand.vx v24, v8, a0
+; CHECK-NEXT: vsll.vi v24, v24, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: bitreverse_nxv16i32:
; CHECK-ZVBB: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
index ca2d7639f528e3f..7698f860589aaf1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
@@ -487,63 +487,34 @@ define <vscale x 64 x i8> @vp_bitreverse_nxv64i8_unmasked(<vscale x 64 x i8> %va
declare <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
define <vscale x 1 x i16> @vp_bitreverse_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16:
; CHECK-ZVBB: # %bb.0:
@@ -555,63 +526,34 @@ define <vscale x 1 x i16> @vp_bitreverse_nxv1i16(<vscale x 1 x i16> %va, <vscale
}
define <vscale x 1 x i16> @vp_bitreverse_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv1i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv1i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -627,63 +569,34 @@ define <vscale x 1 x i16> @vp_bitreverse_nxv1i16_unmasked(<vscale x 1 x i16> %va
declare <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
define <vscale x 2 x i16> @vp_bitreverse_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16:
; CHECK-ZVBB: # %bb.0:
@@ -695,63 +608,34 @@ define <vscale x 2 x i16> @vp_bitreverse_nxv2i16(<vscale x 2 x i16> %va, <vscale
}
define <vscale x 2 x i16> @vp_bitreverse_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -767,63 +651,34 @@ define <vscale x 2 x i16> @vp_bitreverse_nxv2i16_unmasked(<vscale x 2 x i16> %va
declare <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
define <vscale x 4 x i16> @vp_bitreverse_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16:
; CHECK-ZVBB: # %bb.0:
@@ -835,63 +690,34 @@ define <vscale x 4 x i16> @vp_bitreverse_nxv4i16(<vscale x 4 x i16> %va, <vscale
}
define <vscale x 4 x i16> @vp_bitreverse_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -907,63 +733,34 @@ define <vscale x 4 x i16> @vp_bitreverse_nxv4i16_unmasked(<vscale x 4 x i16> %va
declare <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
define <vscale x 8 x i16> @vp_bitreverse_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16:
; CHECK-ZVBB: # %bb.0:
@@ -975,63 +772,34 @@ define <vscale x 8 x i16> @vp_bitreverse_nxv8i16(<vscale x 8 x i16> %va, <vscale
}
define <vscale x 8 x i16> @vp_bitreverse_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1047,63 +815,34 @@ define <vscale x 8 x i16> @vp_bitreverse_nxv8i16_unmasked(<vscale x 8 x i16> %va
declare <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
define <vscale x 16 x i16> @vp_bitreverse_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v12, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v12, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16:
; CHECK-ZVBB: # %bb.0:
@@ -1115,63 +854,34 @@ define <vscale x 16 x i16> @vp_bitreverse_nxv16i16(<vscale x 16 x i16> %va, <vsc
}
define <vscale x 16 x i16> @vp_bitreverse_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1187,63 +897,34 @@ define <vscale x 16 x i16> @vp_bitreverse_nxv16i16_unmasked(<vscale x 16 x i16>
declare <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32)
define <vscale x 32 x i16> @vp_bitreverse_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16:
; CHECK-ZVBB: # %bb.0:
@@ -1255,63 +936,34 @@ define <vscale x 32 x i16> @vp_bitreverse_nxv32i16(<vscale x 32 x i16> %va, <vsc
}
define <vscale x 32 x i16> @vp_bitreverse_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv32i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv32i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1327,79 +979,42 @@ define <vscale x 32 x i16> @vp_bitreverse_nxv32i16_unmasked(<vscale x 32 x i16>
declare <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
define <vscale x 1 x i32> @vp_bitreverse_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT: vor.vv v9, v9, v10, v0.t
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT: vor.vv v9, v9, v10, v0.t
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32:
; CHECK-ZVBB: # %bb.0:
@@ -1411,79 +1026,42 @@ define <vscale x 1 x i32> @vp_bitreverse_nxv1i32(<vscale x 1 x i32> %va, <vscale
}
define <vscale x 1 x i32> @vp_bitreverse_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv1i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv1i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1499,79 +1077,42 @@ define <vscale x 1 x i32> @vp_bitreverse_nxv1i32_unmasked(<vscale x 1 x i32> %va
declare <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
define <vscale x 2 x i32> @vp_bitreverse_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT: vor.vv v9, v9, v10, v0.t
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT: vor.vv v9, v9, v10, v0.t
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32:
; CHECK-ZVBB: # %bb.0:
@@ -1583,79 +1124,42 @@ define <vscale x 2 x i32> @vp_bitreverse_nxv2i32(<vscale x 2 x i32> %va, <vscale
}
define <vscale x 2 x i32> @vp_bitreverse_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1671,79 +1175,42 @@ define <vscale x 2 x i32> @vp_bitreverse_nxv2i32_unmasked(<vscale x 2 x i32> %va
declare <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
define <vscale x 4 x i32> @vp_bitreverse_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
-; RV32-NEXT: vor.vv v10, v10, v12, v0.t
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v12, v12, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 24, v0.t
-; RV64-NEXT: vor.vv v10, v10, v12, v0.t
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v12, v12, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v10, v10, v12, v0.t
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32:
; CHECK-ZVBB: # %bb.0:
@@ -1755,79 +1222,42 @@ define <vscale x 4 x i32> @vp_bitreverse_nxv4i32(<vscale x 4 x i32> %va, <vscale
}
define <vscale x 4 x i32> @vp_bitreverse_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsrl.vi v12, v8, 24
-; RV32-NEXT: vor.vv v10, v10, v12
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsll.vi v12, v12, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsrl.vi v12, v8, 24
-; RV64-NEXT: vor.vv v10, v10, v12
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsll.vi v12, v12, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsrl.vi v12, v8, 24
+; CHECK-NEXT: vor.vv v10, v10, v12
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsll.vi v12, v12, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1843,79 +1273,42 @@ define <vscale x 4 x i32> @vp_bitreverse_nxv4i32_unmasked(<vscale x 4 x i32> %va
declare <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
define <vscale x 8 x i32> @vp_bitreverse_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t
-; RV32-NEXT: vor.vv v12, v12, v16, v0.t
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v12, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
-; RV64-NEXT: vor.vv v12, v12, v16, v0.t
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v16, v16, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v12, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v12, v12, v16, v0.t
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32:
; CHECK-ZVBB: # %bb.0:
@@ -1927,167 +1320,93 @@ define <vscale x 8 x i32> @vp_bitreverse_nxv8i32(<vscale x 8 x i32> %va, <vscale
}
define <vscale x 8 x i32> @vp_bitreverse_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsrl.vi v16, v8, 24
-; RV32-NEXT: vor.vv v12, v12, v16
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsll.vi v16, v16, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsrl.vi v16, v8, 24
-; RV64-NEXT: vor.vv v12, v12, v16
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsll.vi v16, v16, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsrl.vi v16, v8, 24
+; CHECK-NEXT: vor.vv v12, v12, v16
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsll.vi v16, v16, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32_unmasked:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-ZVBB-NEXT: vbrev.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
- ret <vscale x 8 x i32> %v
-}
-
-declare <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
-
-define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vand.vx v24, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v24, v24, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t
-; RV64-NEXT: vor.vv v16, v16, v24, v0.t
-; RV64-NEXT: vand.vx v24, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v24, v24, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-ZVBB-NEXT: vbrev.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_bitreverse_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsrl.vi v24, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vand.vx v24, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v24, v24, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32:
; CHECK-ZVBB: # %bb.0:
@@ -2099,79 +1418,42 @@ define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vsc
}
define <vscale x 16 x i32> @vp_bitreverse_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsrl.vi v24, v8, 24
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vand.vx v24, v8, a0
-; RV32-NEXT: vsll.vi v24, v24, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v16, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsrl.vi v24, v8, 24
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vand.vx v24, v8, a0
-; RV64-NEXT: vsll.vi v24, v24, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v16, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsrl.vi v24, v8, 24
+; CHECK-NEXT: vor.vv v16, v16, v24
+; CHECK-NEXT: vand.vx v24, v8, a0
+; CHECK-NEXT: vsll.vi v24, v24, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v16, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -3774,179 +3056,92 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va
declare <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16>, <vscale x 64 x i1>, i32)
define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv64i16:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV32-NEXT: vmv1r.v v24, v0
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a2, a1, 1
-; RV32-NEXT: vsetvli a3, zero, e8, m1, ta, ma
-; RV32-NEXT: vslidedown.vx v0, v0, a2
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: sub a2, a0, a1
-; RV32-NEXT: sltu a3, a0, a2
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: and a2, a3, a2
-; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v8, v16, 8, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
-; RV32-NEXT: vor.vv v16, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t
-; RV32-NEXT: lui a2, 1
-; RV32-NEXT: addi a2, a2, -241
-; RV32-NEXT: vand.vx v8, v8, a2, v0.t
-; RV32-NEXT: vand.vx v16, v16, a2, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 4, v0.t
-; RV32-NEXT: vor.vv v16, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t
-; RV32-NEXT: lui a3, 3
-; RV32-NEXT: addi a3, a3, 819
-; RV32-NEXT: vand.vx v8, v8, a3, v0.t
-; RV32-NEXT: vand.vx v16, v16, a3, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 2, v0.t
-; RV32-NEXT: vor.vv v16, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
-; RV32-NEXT: lui a4, 5
-; RV32-NEXT: addi a4, a4, 1365
-; RV32-NEXT: vand.vx v8, v8, a4, v0.t
-; RV32-NEXT: vand.vx v16, v16, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a5, sp, 16
-; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
-; RV32-NEXT: bltu a0, a1, .LBB46_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB46_2:
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vmv1r.v v0, v24
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vand.vx v16, v16, a2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a2, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vand.vx v16, v16, a3, v0.t
-; RV32-NEXT: vand.vx v8, v8, a3, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vand.vx v16, v16, a4, v0.t
-; RV32-NEXT: vand.vx v8, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv64i16:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 4
-; RV64-NEXT: sub sp, sp, a1
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV64-NEXT: vmv1r.v v24, v0
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a2, a1, 1
-; RV64-NEXT: vsetvli a3, zero, e8, m1, ta, ma
-; RV64-NEXT: vslidedown.vx v0, v0, a2
-; RV64-NEXT: slli a1, a1, 2
-; RV64-NEXT: sub a2, a0, a1
-; RV64-NEXT: sltu a3, a0, a2
-; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: and a2, a3, a2
-; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v8, v16, 8, v0.t
-; RV64-NEXT: vsll.vi v16, v16, 8, v0.t
-; RV64-NEXT: vor.vv v16, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t
-; RV64-NEXT: lui a2, 1
-; RV64-NEXT: addiw a2, a2, -241
-; RV64-NEXT: vand.vx v8, v8, a2, v0.t
-; RV64-NEXT: vand.vx v16, v16, a2, v0.t
-; RV64-NEXT: vsll.vi v16, v16, 4, v0.t
-; RV64-NEXT: vor.vv v16, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t
-; RV64-NEXT: lui a3, 3
-; RV64-NEXT: addiw a3, a3, 819
-; RV64-NEXT: vand.vx v8, v8, a3, v0.t
-; RV64-NEXT: vand.vx v16, v16, a3, v0.t
-; RV64-NEXT: vsll.vi v16, v16, 2, v0.t
-; RV64-NEXT: vor.vv v16, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t
-; RV64-NEXT: lui a4, 5
-; RV64-NEXT: addiw a4, a4, 1365
-; RV64-NEXT: vand.vx v8, v8, a4, v0.t
-; RV64-NEXT: vand.vx v16, v16, a4, v0.t
-; RV64-NEXT: vsll.vi v16, v16, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: addi a5, sp, 16
-; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
-; RV64-NEXT: bltu a0, a1, .LBB46_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB46_2:
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vmv1r.v v0, v24
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vand.vx v16, v16, a2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a2, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vand.vx v16, v16, a3, v0.t
-; RV64-NEXT: vand.vx v8, v8, a3, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vand.vx v16, v16, a4, v0.t
-; RV64-NEXT: vand.vx v8, v8, a4, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
-; RV64-NEXT: add sp, sp, a0
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv64i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 1
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v16, 8, v0.t
+; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
+; CHECK-NEXT: vor.vv v16, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v8, v16, 4, v0.t
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: addi a2, a2, -241
+; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
+; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t
+; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v8, v16, 2, v0.t
+; CHECK-NEXT: lui a3, 3
+; CHECK-NEXT: addi a3, a3, 819
+; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
+; CHECK-NEXT: vand.vx v16, v16, a3, v0.t
+; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t
+; CHECK-NEXT: vor.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v8, v16, 1, v0.t
+; CHECK-NEXT: lui a4, 5
+; CHECK-NEXT: addi a4, a4, 1365
+; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
+; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
+; CHECK-NEXT: vsll.vi v16, v16, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: addi a5, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a0, a1, .LBB46_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB46_2:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v16, v16, a3, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
; CHECK-ZVBB: # %bb.0:
@@ -3975,121 +3170,63 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
}
define <vscale x 64 x i16> @vp_bitreverse_nxv64i16_unmasked(<vscale x 64 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv64i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: sub a2, a0, a1
-; RV32-NEXT: sltu a3, a0, a2
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: and a2, a3, a2
-; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v16, 8
-; RV32-NEXT: vsll.vi v16, v16, 8
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 4
-; RV32-NEXT: lui a2, 1
-; RV32-NEXT: addi a2, a2, -241
-; RV32-NEXT: vand.vx v24, v24, a2
-; RV32-NEXT: vand.vx v16, v16, a2
-; RV32-NEXT: vsll.vi v16, v16, 4
-; RV32-NEXT: vor.vv v16, v24, v16
-; RV32-NEXT: vsrl.vi v24, v16, 2
-; RV32-NEXT: lui a3, 3
-; RV32-NEXT: addi a3, a3, 819
-; RV32-NEXT: vand.vx v24, v24, a3
-; RV32-NEXT: vand.vx v16, v16, a3
-; RV32-NEXT: vsll.vi v16, v16, 2
-; RV32-NEXT: vor.vv v16, v24, v16
-; RV32-NEXT: vsrl.vi v24, v16, 1
-; RV32-NEXT: lui a4, 5
-; RV32-NEXT: addi a4, a4, 1365
-; RV32-NEXT: vand.vx v24, v24, a4
-; RV32-NEXT: vand.vx v16, v16, a4
-; RV32-NEXT: vadd.vv v16, v16, v16
-; RV32-NEXT: vor.vv v16, v24, v16
-; RV32-NEXT: bltu a0, a1, .LBB47_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB47_2:
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 4
-; RV32-NEXT: vand.vx v24, v24, a2
-; RV32-NEXT: vand.vx v8, v8, a2
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v24, v8, 2
-; RV32-NEXT: vand.vx v24, v24, a3
-; RV32-NEXT: vand.vx v8, v8, a3
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v24, v8, 1
-; RV32-NEXT: vand.vx v24, v24, a4
-; RV32-NEXT: vand.vx v8, v8, a4
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v24, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv64i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 2
-; RV64-NEXT: sub a2, a0, a1
-; RV64-NEXT: sltu a3, a0, a2
-; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: and a2, a3, a2
-; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v24, v16, 8
-; RV64-NEXT: vsll.vi v16, v16, 8
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 4
-; RV64-NEXT: lui a2, 1
-; RV64-NEXT: addiw a2, a2, -241
-; RV64-NEXT: vand.vx v24, v24, a2
-; RV64-NEXT: vand.vx v16, v16, a2
-; RV64-NEXT: vsll.vi v16, v16, 4
-; RV64-NEXT: vor.vv v16, v24, v16
-; RV64-NEXT: vsrl.vi v24, v16, 2
-; RV64-NEXT: lui a3, 3
-; RV64-NEXT: addiw a3, a3, 819
-; RV64-NEXT: vand.vx v24, v24, a3
-; RV64-NEXT: vand.vx v16, v16, a3
-; RV64-NEXT: vsll.vi v16, v16, 2
-; RV64-NEXT: vor.vv v16, v24, v16
-; RV64-NEXT: vsrl.vi v24, v16, 1
-; RV64-NEXT: lui a4, 5
-; RV64-NEXT: addiw a4, a4, 1365
-; RV64-NEXT: vand.vx v24, v24, a4
-; RV64-NEXT: vand.vx v16, v16, a4
-; RV64-NEXT: vadd.vv v16, v16, v16
-; RV64-NEXT: vor.vv v16, v24, v16
-; RV64-NEXT: bltu a0, a1, .LBB47_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB47_2:
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v24, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 4
-; RV64-NEXT: vand.vx v24, v24, a2
-; RV64-NEXT: vand.vx v8, v8, a2
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v24, v8
-; RV64-NEXT: vsrl.vi v24, v8, 2
-; RV64-NEXT: vand.vx v24, v24, a3
-; RV64-NEXT: vand.vx v8, v8, a3
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v24, v8
-; RV64-NEXT: vsrl.vi v24, v8, 1
-; RV64-NEXT: vand.vx v24, v24, a4
-; RV64-NEXT: vand.vx v8, v8, a4
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v24, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv64i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v24, v16, 8
+; CHECK-NEXT: vsll.vi v16, v16, 8
+; CHECK-NEXT: vor.vv v16, v16, v24
+; CHECK-NEXT: vsrl.vi v24, v16, 4
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: addi a2, a2, -241
+; CHECK-NEXT: vand.vx v24, v24, a2
+; CHECK-NEXT: vand.vx v16, v16, a2
+; CHECK-NEXT: vsll.vi v16, v16, 4
+; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: vsrl.vi v24, v16, 2
+; CHECK-NEXT: lui a3, 3
+; CHECK-NEXT: addi a3, a3, 819
+; CHECK-NEXT: vand.vx v24, v24, a3
+; CHECK-NEXT: vand.vx v16, v16, a3
+; CHECK-NEXT: vsll.vi v16, v16, 2
+; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: vsrl.vi v24, v16, 1
+; CHECK-NEXT: lui a4, 5
+; CHECK-NEXT: addi a4, a4, 1365
+; CHECK-NEXT: vand.vx v24, v24, a4
+; CHECK-NEXT: vand.vx v16, v16, a4
+; CHECK-NEXT: vadd.vv v16, v16, v16
+; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: bltu a0, a1, .LBB47_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB47_2:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v24, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v24
+; CHECK-NEXT: vsrl.vi v24, v8, 4
+; CHECK-NEXT: vand.vx v24, v24, a2
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v24, v8
+; CHECK-NEXT: vsrl.vi v24, v8, 2
+; CHECK-NEXT: vand.vx v24, v24, a3
+; CHECK-NEXT: vand.vx v8, v8, a3
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v24, v8
+; CHECK-NEXT: vsrl.vi v24, v8, 1
+; CHECK-NEXT: vand.vx v24, v24, a4
+; CHECK-NEXT: vand.vx v8, v8, a4
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v24, v8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -4117,65 +3254,35 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16_unmasked(<vscale x 64 x i16>
; Test promotion.
declare <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
define <vscale x 1 x i9> @vp_bitreverse_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_nxv1i9:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 7, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_nxv1i9:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 7, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_nxv1i9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 7, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i9:
; CHECK-ZVBB: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
index 884bb206a31ebe2..c55399b502471a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
@@ -119,37 +119,21 @@ define <vscale x 32 x i16> @bswap_nxv32i16(<vscale x 32 x i16> %va) {
declare <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16>)
define <vscale x 1 x i32> @bswap_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32-LABEL: bswap_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bswap_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: bswap_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: bswap_nxv1i32:
; CHECK-ZVKB: # %bb.0:
@@ -162,37 +146,21 @@ define <vscale x 1 x i32> @bswap_nxv1i32(<vscale x 1 x i32> %va) {
declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>)
define <vscale x 2 x i32> @bswap_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32-LABEL: bswap_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bswap_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: bswap_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: bswap_nxv2i32:
; CHECK-ZVKB: # %bb.0:
@@ -205,37 +173,21 @@ define <vscale x 2 x i32> @bswap_nxv2i32(<vscale x 2 x i32> %va) {
declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>)
define <vscale x 4 x i32> @bswap_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32-LABEL: bswap_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsrl.vi v12, v8, 24
-; RV32-NEXT: vor.vv v10, v10, v12
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsll.vi v12, v12, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bswap_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsrl.vi v12, v8, 24
-; RV64-NEXT: vor.vv v10, v10, v12
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsll.vi v12, v12, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: bswap_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsrl.vi v12, v8, 24
+; CHECK-NEXT: vor.vv v10, v10, v12
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsll.vi v12, v12, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: bswap_nxv4i32:
; CHECK-ZVKB: # %bb.0:
@@ -248,37 +200,21 @@ define <vscale x 4 x i32> @bswap_nxv4i32(<vscale x 4 x i32> %va) {
declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>)
define <vscale x 8 x i32> @bswap_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32-LABEL: bswap_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsrl.vi v16, v8, 24
-; RV32-NEXT: vor.vv v12, v12, v16
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsll.vi v16, v16, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bswap_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsrl.vi v16, v8, 24
-; RV64-NEXT: vor.vv v12, v12, v16
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsll.vi v16, v16, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: bswap_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsrl.vi v16, v8, 24
+; CHECK-NEXT: vor.vv v12, v12, v16
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsll.vi v16, v16, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: bswap_nxv8i32:
; CHECK-ZVKB: # %bb.0:
@@ -291,37 +227,21 @@ define <vscale x 8 x i32> @bswap_nxv8i32(<vscale x 8 x i32> %va) {
declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>)
define <vscale x 16 x i32> @bswap_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32-LABEL: bswap_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsrl.vi v24, v8, 24
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vand.vx v24, v8, a0
-; RV32-NEXT: vsll.vi v24, v24, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bswap_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsrl.vi v24, v8, 24
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vand.vx v24, v8, a0
-; RV64-NEXT: vsll.vi v24, v24, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: bswap_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsrl.vi v24, v8, 24
+; CHECK-NEXT: vor.vv v16, v16, v24
+; CHECK-NEXT: vand.vx v24, v8, a0
+; CHECK-NEXT: vsll.vi v24, v24, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: bswap_nxv16i32:
; CHECK-ZVKB: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
index 3c4ff5e7d1cefc3..6aac13a0bcbb873 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
@@ -251,37 +251,21 @@ define <vscale x 32 x i16> @vp_bswap_nxv32i16_unmasked(<vscale x 32 x i16> %va,
declare <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
define <vscale x 1 x i32> @vp_bswap_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT: vor.vv v9, v9, v10, v0.t
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT: vor.vv v9, v9, v10, v0.t
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv1i32:
; CHECK-ZVKB: # %bb.0:
@@ -293,37 +277,21 @@ define <vscale x 1 x i32> @vp_bswap_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1
}
define <vscale x 1 x i32> @vp_bswap_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv1i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_nxv1i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv1i32_unmasked:
; CHECK-ZVKB: # %bb.0:
@@ -339,37 +307,21 @@ define <vscale x 1 x i32> @vp_bswap_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
declare <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
define <vscale x 2 x i32> @vp_bswap_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT: vor.vv v9, v9, v10, v0.t
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT: vor.vv v9, v9, v10, v0.t
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv2i32:
; CHECK-ZVKB: # %bb.0:
@@ -381,37 +333,21 @@ define <vscale x 2 x i32> @vp_bswap_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2
}
define <vscale x 2 x i32> @vp_bswap_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_nxv2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv2i32_unmasked:
; CHECK-ZVKB: # %bb.0:
@@ -427,37 +363,21 @@ define <vscale x 2 x i32> @vp_bswap_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
declare <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
define <vscale x 4 x i32> @vp_bswap_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
-; RV32-NEXT: vor.vv v10, v10, v12, v0.t
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v12, v12, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 24, v0.t
-; RV64-NEXT: vor.vv v10, v10, v12, v0.t
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v12, v12, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v10, v10, v12, v0.t
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv4i32:
; CHECK-ZVKB: # %bb.0:
@@ -469,37 +389,21 @@ define <vscale x 4 x i32> @vp_bswap_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4
}
define <vscale x 4 x i32> @vp_bswap_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsrl.vi v12, v8, 24
-; RV32-NEXT: vor.vv v10, v10, v12
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsll.vi v12, v12, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_nxv4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsrl.vi v12, v8, 24
-; RV64-NEXT: vor.vv v10, v10, v12
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsll.vi v12, v12, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsrl.vi v12, v8, 24
+; CHECK-NEXT: vor.vv v10, v10, v12
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsll.vi v12, v12, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv4i32_unmasked:
; CHECK-ZVKB: # %bb.0:
@@ -515,37 +419,21 @@ define <vscale x 4 x i32> @vp_bswap_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
declare <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
define <vscale x 8 x i32> @vp_bswap_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t
-; RV32-NEXT: vor.vv v12, v12, v16, v0.t
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
-; RV64-NEXT: vor.vv v12, v12, v16, v0.t
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v16, v16, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v12, v12, v16, v0.t
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv8i32:
; CHECK-ZVKB: # %bb.0:
@@ -557,37 +445,21 @@ define <vscale x 8 x i32> @vp_bswap_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
}
define <vscale x 8 x i32> @vp_bswap_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsrl.vi v16, v8, 24
-; RV32-NEXT: vor.vv v12, v12, v16
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsll.vi v16, v16, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_nxv8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsrl.vi v16, v8, 24
-; RV64-NEXT: vor.vv v12, v12, v16
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsll.vi v16, v16, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsrl.vi v16, v8, 24
+; CHECK-NEXT: vor.vv v12, v12, v16
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsll.vi v16, v16, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv8i32_unmasked:
; CHECK-ZVKB: # %bb.0:
@@ -603,37 +475,21 @@ define <vscale x 8 x i32> @vp_bswap_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
declare <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
define <vscale x 16 x i32> @vp_bswap_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
-; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: vand.vx v24, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v24, v24, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t
-; RV64-NEXT: vor.vv v16, v16, v24, v0.t
-; RV64-NEXT: vand.vx v24, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v24, v24, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsrl.vi v24, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vand.vx v24, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v24, v24, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv16i32:
; CHECK-ZVKB: # %bb.0:
@@ -645,37 +501,21 @@ define <vscale x 16 x i32> @vp_bswap_nxv16i32(<vscale x 16 x i32> %va, <vscale x
}
define <vscale x 16 x i32> @vp_bswap_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_nxv16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsrl.vi v24, v8, 24
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vand.vx v24, v8, a0
-; RV32-NEXT: vsll.vi v24, v24, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_nxv16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsrl.vi v24, v8, 24
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vand.vx v24, v8, a0
-; RV64-NEXT: vsll.vi v24, v24, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsrl.vi v24, v8, 24
+; CHECK-NEXT: vor.vv v16, v16, v24
+; CHECK-NEXT: vand.vx v24, v8, a0
+; CHECK-NEXT: vsll.vi v24, v24, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv16i32_unmasked:
; CHECK-ZVKB: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
index a128adc8de619ce..46bedcd4e966628 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
@@ -173,21 +173,13 @@ define <vscale x 2 x i64> @vselect_add_const_nxv2i64(<vscale x 2 x i64> %a0) {
}
define <2 x i16> @vselect_add_const_signbit_v2i16(<2 x i16> %a0) {
-; RV32-LABEL: vselect_add_const_signbit_v2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 8
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vssubu.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vselect_add_const_signbit_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 8
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vssubu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vselect_add_const_signbit_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
%cmp = icmp ugt <2 x i16> %a0, <i16 32766, i16 32766>
%v1 = add <2 x i16> %a0, <i16 -32767, i16 -32767>
%v2 = select <2 x i1> %cmp, <2 x i16> %v1, <2 x i16> zeroinitializer
@@ -195,21 +187,13 @@ define <2 x i16> @vselect_add_const_signbit_v2i16(<2 x i16> %a0) {
}
define <vscale x 2 x i16> @vselect_add_const_signbit_nxv2i16(<vscale x 2 x i16> %a0) {
-; RV32-LABEL: vselect_add_const_signbit_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 8
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT: vssubu.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vselect_add_const_signbit_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 8
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT: vssubu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vselect_add_const_signbit_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
%cm1 = insertelement <vscale x 2 x i16> poison, i16 32766, i32 0
%splatcm1 = shufflevector <vscale x 2 x i16> %cm1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
%nc = insertelement <vscale x 2 x i16> poison, i16 -32767, i32 0
@@ -318,3 +302,6 @@ declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
declare <vscale x 2 x i64> @llvm.umin.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
declare <vscale x 2 x i64> @llvm.umax.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index c1a8c657b4ff86b..d78d67d5e359871 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -394,71 +394,38 @@ define <vscale x 64 x i8> @ctlz_nxv64i8(<vscale x 64 x i8> %va) {
declare <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8>, i1)
define <vscale x 1 x i16> @ctlz_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32I-LABEL: ctlz_nxv1i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_nxv1i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv1i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_nxv1i16:
; CHECK-F: # %bb.0:
@@ -493,71 +460,38 @@ define <vscale x 1 x i16> @ctlz_nxv1i16(<vscale x 1 x i16> %va) {
declare <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16>, i1)
define <vscale x 2 x i16> @ctlz_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32I-LABEL: ctlz_nxv2i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_nxv2i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv2i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_nxv2i16:
; CHECK-F: # %bb.0:
@@ -592,71 +526,38 @@ define <vscale x 2 x i16> @ctlz_nxv2i16(<vscale x 2 x i16> %va) {
declare <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16>, i1)
define <vscale x 4 x i16> @ctlz_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32I-LABEL: ctlz_nxv4i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_nxv4i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv4i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_nxv4i16:
; CHECK-F: # %bb.0:
@@ -691,71 +592,38 @@ define <vscale x 4 x i16> @ctlz_nxv4i16(<vscale x 4 x i16> %va) {
declare <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16>, i1)
define <vscale x 8 x i16> @ctlz_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32I-LABEL: ctlz_nxv8i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v10, v10, a0
-; RV32I-NEXT: vsub.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v10, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v10, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_nxv8i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v10, v10, a0
-; RV64I-NEXT: vsub.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v10, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v10, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv8i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_nxv8i16:
; CHECK-F: # %bb.0:
@@ -790,71 +658,38 @@ define <vscale x 8 x i16> @ctlz_nxv8i16(<vscale x 8 x i16> %va) {
declare <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16>, i1)
define <vscale x 16 x i16> @ctlz_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32I-LABEL: ctlz_nxv16i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v12, v12, a0
-; RV32I-NEXT: vsub.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v12, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v12, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_nxv16i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v12, v12, a0
-; RV64I-NEXT: vsub.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v12, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v12, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv16i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_nxv16i16:
; CHECK-F: # %bb.0:
@@ -889,71 +724,38 @@ define <vscale x 16 x i16> @ctlz_nxv16i16(<vscale x 16 x i16> %va) {
declare <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16>, i1)
define <vscale x 32 x i16> @ctlz_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: ctlz_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctlz_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: ctlz_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctlz_nxv32i16:
; CHECK-ZVBB: # %bb.0:
@@ -966,77 +768,41 @@ define <vscale x 32 x i16> @ctlz_nxv32i16(<vscale x 32 x i16> %va) {
declare <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16>, i1)
define <vscale x 1 x i32> @ctlz_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32I-LABEL: ctlz_nxv1i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 16
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_nxv1i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 16
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv1i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 16
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_nxv1i32:
; CHECK-F: # %bb.0:
@@ -1074,77 +840,41 @@ define <vscale x 1 x i32> @ctlz_nxv1i32(<vscale x 1 x i32> %va) {
declare <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32>, i1)
define <vscale x 2 x i32> @ctlz_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32I-LABEL: ctlz_nxv2i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 16
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_nxv2i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 16
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv2i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 16
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_nxv2i32:
; CHECK-F: # %bb.0:
@@ -1182,77 +912,41 @@ define <vscale x 2 x i32> @ctlz_nxv2i32(<vscale x 2 x i32> %va) {
declare <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32>, i1)
define <vscale x 4 x i32> @ctlz_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32I-LABEL: ctlz_nxv4i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 16
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v10, v10, a0
-; RV32I-NEXT: vsub.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v10, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v10, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_nxv4i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 16
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v10, v10, a0
-; RV64I-NEXT: vsub.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v10, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v10, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv4i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 16
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_nxv4i32:
; CHECK-F: # %bb.0:
@@ -1290,77 +984,41 @@ define <vscale x 4 x i32> @ctlz_nxv4i32(<vscale x 4 x i32> %va) {
declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>, i1)
define <vscale x 8 x i32> @ctlz_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32I-LABEL: ctlz_nxv8i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 16
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v12, v12, a0
-; RV32I-NEXT: vsub.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v12, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v12, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_nxv8i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 16
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v12, v12, a0
-; RV64I-NEXT: vsub.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v12, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v12, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv8i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 16
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_nxv8i32:
; CHECK-F: # %bb.0:
@@ -1398,77 +1056,41 @@ define <vscale x 8 x i32> @ctlz_nxv8i32(<vscale x 8 x i32> %va) {
declare <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32>, i1)
define <vscale x 16 x i32> @ctlz_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32I-LABEL: ctlz_nxv16i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; RV32I-NEXT: vsrl.vi v16, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v16
-; RV32I-NEXT: vsrl.vi v16, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v16
-; RV32I-NEXT: vsrl.vi v16, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v16
-; RV32I-NEXT: vsrl.vi v16, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v16
-; RV32I-NEXT: vsrl.vi v16, v8, 16
-; RV32I-NEXT: vor.vv v8, v8, v16
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v16, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v16, v16, a0
-; RV32I-NEXT: vsub.vv v8, v8, v16
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v16, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v16, v8
-; RV32I-NEXT: vsrl.vi v16, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v16
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_nxv16i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; RV64I-NEXT: vsrl.vi v16, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v16
-; RV64I-NEXT: vsrl.vi v16, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v16
-; RV64I-NEXT: vsrl.vi v16, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v16
-; RV64I-NEXT: vsrl.vi v16, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v16
-; RV64I-NEXT: vsrl.vi v16, v8, 16
-; RV64I-NEXT: vor.vv v8, v8, v16
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v16, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v16, v16, a0
-; RV64I-NEXT: vsub.vv v8, v8, v16
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v16, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v16, v8
-; RV64I-NEXT: vsrl.vi v16, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v16
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv16i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 16
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v16, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_nxv16i32:
; CHECK-F: # %bb.0:
@@ -2425,71 +2047,38 @@ define <vscale x 64 x i8> @ctlz_zero_undef_nxv64i8(<vscale x 64 x i8> %va) {
}
define <vscale x 1 x i16> @ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv1i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv1i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv1i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv1i16:
; CHECK-F: # %bb.0:
@@ -2519,71 +2108,38 @@ define <vscale x 1 x i16> @ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
}
define <vscale x 2 x i16> @ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv2i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv2i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv2i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv2i16:
; CHECK-F: # %bb.0:
@@ -2613,71 +2169,38 @@ define <vscale x 2 x i16> @ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
}
define <vscale x 4 x i16> @ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv4i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv4i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv4i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv4i16:
; CHECK-F: # %bb.0:
@@ -2707,71 +2230,38 @@ define <vscale x 4 x i16> @ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
}
define <vscale x 8 x i16> @ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv8i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v10, v10, a0
-; RV32I-NEXT: vsub.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v10, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v10, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv8i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v10, v10, a0
-; RV64I-NEXT: vsub.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v10, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v10, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv8i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv8i16:
; CHECK-F: # %bb.0:
@@ -2801,71 +2291,38 @@ define <vscale x 8 x i16> @ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
}
define <vscale x 16 x i16> @ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv16i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v12, v12, a0
-; RV32I-NEXT: vsub.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v12, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v12, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv16i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v12, v12, a0
-; RV64I-NEXT: vsub.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v12, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v12, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv16i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv16i16:
; CHECK-F: # %bb.0:
@@ -2895,153 +2352,84 @@ define <vscale x 16 x i16> @ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
}
define <vscale x 32 x i16> @ctlz_zero_undef_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: ctlz_zero_undef_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctlz_zero_undef_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv32i16:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-ZVBB-NEXT: vclz.v v8, v8
+; CHECK-LABEL: ctlz_zero_undef_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv32i16:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
%a = call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> %va, i1 true)
ret <vscale x 32 x i16> %a
}
define <vscale x 1 x i32> @ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv1i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 16
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv1i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 16
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv1i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 16
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv1i32:
; CHECK-F: # %bb.0:
@@ -3074,77 +2462,41 @@ define <vscale x 1 x i32> @ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
}
define <vscale x 2 x i32> @ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv2i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 16
-; RV32I-NEXT: vor.vv v8, v8, v9
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv2i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 16
-; RV64I-NEXT: vor.vv v8, v8, v9
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv2i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 16
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv2i32:
; CHECK-F: # %bb.0:
@@ -3177,77 +2529,41 @@ define <vscale x 2 x i32> @ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
}
define <vscale x 4 x i32> @ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv4i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 16
-; RV32I-NEXT: vor.vv v8, v8, v10
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v10, v10, a0
-; RV32I-NEXT: vsub.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v10, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v10, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv4i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 16
-; RV64I-NEXT: vor.vv v8, v8, v10
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v10, v10, a0
-; RV64I-NEXT: vsub.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v10, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v10, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv4i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 16
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv4i32:
; CHECK-F: # %bb.0:
@@ -3280,77 +2596,41 @@ define <vscale x 4 x i32> @ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
}
define <vscale x 8 x i32> @ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv8i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 16
-; RV32I-NEXT: vor.vv v8, v8, v12
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v12, v12, a0
-; RV32I-NEXT: vsub.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v12, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v12, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv8i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 16
-; RV64I-NEXT: vor.vv v8, v8, v12
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v12, v12, a0
-; RV64I-NEXT: vsub.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v12, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v12, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv8i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 16
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv8i32:
; CHECK-F: # %bb.0:
@@ -3383,77 +2663,41 @@ define <vscale x 8 x i32> @ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
}
define <vscale x 16 x i32> @ctlz_zero_undef_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv16i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; RV32I-NEXT: vsrl.vi v16, v8, 1
-; RV32I-NEXT: vor.vv v8, v8, v16
-; RV32I-NEXT: vsrl.vi v16, v8, 2
-; RV32I-NEXT: vor.vv v8, v8, v16
-; RV32I-NEXT: vsrl.vi v16, v8, 4
-; RV32I-NEXT: vor.vv v8, v8, v16
-; RV32I-NEXT: vsrl.vi v16, v8, 8
-; RV32I-NEXT: vor.vv v8, v8, v16
-; RV32I-NEXT: vsrl.vi v16, v8, 16
-; RV32I-NEXT: vor.vv v8, v8, v16
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vsrl.vi v16, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v16, v16, a0
-; RV32I-NEXT: vsub.vv v8, v8, v16
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v16, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v16, v8
-; RV32I-NEXT: vsrl.vi v16, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v16
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv16i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; RV64I-NEXT: vsrl.vi v16, v8, 1
-; RV64I-NEXT: vor.vv v8, v8, v16
-; RV64I-NEXT: vsrl.vi v16, v8, 2
-; RV64I-NEXT: vor.vv v8, v8, v16
-; RV64I-NEXT: vsrl.vi v16, v8, 4
-; RV64I-NEXT: vor.vv v8, v8, v16
-; RV64I-NEXT: vsrl.vi v16, v8, 8
-; RV64I-NEXT: vor.vv v8, v8, v16
-; RV64I-NEXT: vsrl.vi v16, v8, 16
-; RV64I-NEXT: vor.vv v8, v8, v16
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vsrl.vi v16, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v16, v16, a0
-; RV64I-NEXT: vsub.vv v8, v8, v16
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v16, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v16, v8
-; RV64I-NEXT: vsrl.vi v16, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v16
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv16i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 2
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 8
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 16
+; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v16, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: ctlz_zero_undef_nxv16i32:
; CHECK-F: # %bb.0:
@@ -4025,3 +3269,6 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
%a = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 true)
ret <vscale x 8 x i64> %a
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index bb0ff1c2bf09b0a..f479937ba2fc727 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -675,71 +675,38 @@ define <vscale x 16 x i16> @vp_ctlz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i
declare <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
define <vscale x 32 x i16> @vp_ctlz_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i16:
; CHECK-ZVBB: # %bb.0:
@@ -751,71 +718,38 @@ define <vscale x 32 x i16> @vp_ctlz_nxv32i16(<vscale x 32 x i16> %va, <vscale x
}
define <vscale x 32 x i16> @vp_ctlz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_nxv32i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_nxv32i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -2071,71 +2005,38 @@ define <vscale x 16 x i16> @vp_ctlz_zero_undef_nxv16i16_unmasked(<vscale x 16 x
define <vscale x 32 x i16> @vp_ctlz_zero_undef_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i16:
; CHECK-ZVBB: # %bb.0:
@@ -2147,71 +2048,38 @@ define <vscale x 32 x i16> @vp_ctlz_zero_undef_nxv32i16(<vscale x 32 x i16> %va,
}
define <vscale x 32 x i16> @vp_ctlz_zero_undef_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -2862,3 +2730,6 @@ define <vscale x 1 x i9> @vp_ctlz_zero_undef_nxv1i9(<vscale x 1 x i9> %va, <vsca
%v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i9> %v
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
index ef0a293ad5fb9ca..1a2b2640ca4fe4c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
@@ -201,53 +201,29 @@ define <vscale x 64 x i8> @ctpop_nxv64i8(<vscale x 64 x i8> %va) {
declare <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8>)
define <vscale x 1 x i16> @ctpop_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32-LABEL: ctpop_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv1i16:
; CHECK-ZVBB: # %bb.0:
@@ -260,53 +236,29 @@ define <vscale x 1 x i16> @ctpop_nxv1i16(<vscale x 1 x i16> %va) {
declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>)
define <vscale x 2 x i16> @ctpop_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32-LABEL: ctpop_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv2i16:
; CHECK-ZVBB: # %bb.0:
@@ -319,53 +271,29 @@ define <vscale x 2 x i16> @ctpop_nxv2i16(<vscale x 2 x i16> %va) {
declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>)
define <vscale x 4 x i16> @ctpop_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32-LABEL: ctpop_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv4i16:
; CHECK-ZVBB: # %bb.0:
@@ -378,53 +306,29 @@ define <vscale x 4 x i16> @ctpop_nxv4i16(<vscale x 4 x i16> %va) {
declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>)
define <vscale x 8 x i16> @ctpop_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32-LABEL: ctpop_nxv8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv8i16:
; CHECK-ZVBB: # %bb.0:
@@ -437,53 +341,29 @@ define <vscale x 8 x i16> @ctpop_nxv8i16(<vscale x 8 x i16> %va) {
declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>)
define <vscale x 16 x i16> @ctpop_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32-LABEL: ctpop_nxv16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv16i16:
; CHECK-ZVBB: # %bb.0:
@@ -496,53 +376,29 @@ define <vscale x 16 x i16> @ctpop_nxv16i16(<vscale x 16 x i16> %va) {
declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>)
define <vscale x 32 x i16> @ctpop_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: ctpop_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv32i16:
; CHECK-ZVBB: # %bb.0:
@@ -555,55 +411,30 @@ define <vscale x 32 x i16> @ctpop_nxv32i16(<vscale x 32 x i16> %va) {
declare <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16>)
define <vscale x 1 x i32> @ctpop_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32-LABEL: ctpop_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv1i32:
; CHECK-ZVBB: # %bb.0:
@@ -616,55 +447,30 @@ define <vscale x 1 x i32> @ctpop_nxv1i32(<vscale x 1 x i32> %va) {
declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>)
define <vscale x 2 x i32> @ctpop_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32-LABEL: ctpop_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv2i32:
; CHECK-ZVBB: # %bb.0:
@@ -677,55 +483,30 @@ define <vscale x 2 x i32> @ctpop_nxv2i32(<vscale x 2 x i32> %va) {
declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>)
define <vscale x 4 x i32> @ctpop_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32-LABEL: ctpop_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv4i32:
; CHECK-ZVBB: # %bb.0:
@@ -738,55 +519,30 @@ define <vscale x 4 x i32> @ctpop_nxv4i32(<vscale x 4 x i32> %va) {
declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>)
define <vscale x 8 x i32> @ctpop_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32-LABEL: ctpop_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv8i32:
; CHECK-ZVBB: # %bb.0:
@@ -799,55 +555,30 @@ define <vscale x 8 x i32> @ctpop_nxv8i32(<vscale x 8 x i32> %va) {
declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>)
define <vscale x 16 x i32> @ctpop_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32-LABEL: ctpop_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ctpop_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: ctpop_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctpop_nxv16i32:
; CHECK-ZVBB: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
index 1c003a33c54bf8b..9767ba4bbc3b7a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
@@ -417,53 +417,29 @@ define <vscale x 64 x i8> @vp_ctpop_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32
declare <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
define <vscale x 1 x i16> @vp_ctpop_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16:
; CHECK-ZVBB: # %bb.0:
@@ -475,53 +451,29 @@ define <vscale x 1 x i16> @vp_ctpop_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1
}
define <vscale x 1 x i16> @vp_ctpop_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv1i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv1i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -537,53 +489,29 @@ define <vscale x 1 x i16> @vp_ctpop_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32
declare <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
define <vscale x 2 x i16> @vp_ctpop_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16:
; CHECK-ZVBB: # %bb.0:
@@ -595,53 +523,29 @@ define <vscale x 2 x i16> @vp_ctpop_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2
}
define <vscale x 2 x i16> @vp_ctpop_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -657,53 +561,29 @@ define <vscale x 2 x i16> @vp_ctpop_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32
declare <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
define <vscale x 4 x i16> @vp_ctpop_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16:
; CHECK-ZVBB: # %bb.0:
@@ -715,53 +595,29 @@ define <vscale x 4 x i16> @vp_ctpop_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4
}
define <vscale x 4 x i16> @vp_ctpop_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -777,53 +633,29 @@ define <vscale x 4 x i16> @vp_ctpop_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32
declare <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
define <vscale x 8 x i16> @vp_ctpop_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16:
; CHECK-ZVBB: # %bb.0:
@@ -835,53 +667,29 @@ define <vscale x 8 x i16> @vp_ctpop_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8
}
define <vscale x 8 x i16> @vp_ctpop_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -897,53 +705,29 @@ define <vscale x 8 x i16> @vp_ctpop_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32
declare <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
define <vscale x 16 x i16> @vp_ctpop_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16:
; CHECK-ZVBB: # %bb.0:
@@ -955,53 +739,29 @@ define <vscale x 16 x i16> @vp_ctpop_nxv16i16(<vscale x 16 x i16> %va, <vscale x
}
define <vscale x 16 x i16> @vp_ctpop_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1017,53 +777,29 @@ define <vscale x 16 x i16> @vp_ctpop_nxv16i16_unmasked(<vscale x 16 x i16> %va,
declare <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32)
define <vscale x 32 x i16> @vp_ctpop_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16:
; CHECK-ZVBB: # %bb.0:
@@ -1075,53 +811,29 @@ define <vscale x 32 x i16> @vp_ctpop_nxv32i16(<vscale x 32 x i16> %va, <vscale x
}
define <vscale x 32 x i16> @vp_ctpop_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv32i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv32i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1137,55 +849,30 @@ define <vscale x 32 x i16> @vp_ctpop_nxv32i16_unmasked(<vscale x 32 x i16> %va,
declare <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
define <vscale x 1 x i32> @vp_ctpop_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32:
; CHECK-ZVBB: # %bb.0:
@@ -1197,55 +884,30 @@ define <vscale x 1 x i32> @vp_ctpop_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1
}
define <vscale x 1 x i32> @vp_ctpop_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv1i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv1i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1261,55 +923,30 @@ define <vscale x 1 x i32> @vp_ctpop_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
declare <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
define <vscale x 2 x i32> @vp_ctpop_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32:
; CHECK-ZVBB: # %bb.0:
@@ -1321,55 +958,30 @@ define <vscale x 2 x i32> @vp_ctpop_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2
}
define <vscale x 2 x i32> @vp_ctpop_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1385,55 +997,30 @@ define <vscale x 2 x i32> @vp_ctpop_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
declare <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
define <vscale x 4 x i32> @vp_ctpop_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32:
; CHECK-ZVBB: # %bb.0:
@@ -1445,55 +1032,30 @@ define <vscale x 4 x i32> @vp_ctpop_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4
}
define <vscale x 4 x i32> @vp_ctpop_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1509,55 +1071,30 @@ define <vscale x 4 x i32> @vp_ctpop_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
declare <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
define <vscale x 8 x i32> @vp_ctpop_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32:
; CHECK-ZVBB: # %bb.0:
@@ -1569,55 +1106,30 @@ define <vscale x 8 x i32> @vp_ctpop_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
}
define <vscale x 8 x i32> @vp_ctpop_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1633,55 +1145,30 @@ define <vscale x 8 x i32> @vp_ctpop_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
declare <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
define <vscale x 16 x i32> @vp_ctpop_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32:
; CHECK-ZVBB: # %bb.0:
@@ -1693,55 +1180,30 @@ define <vscale x 16 x i32> @vp_ctpop_nxv16i32(<vscale x 16 x i32> %va, <vscale x
}
define <vscale x 16 x i32> @vp_ctpop_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -3133,59 +2595,32 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
declare <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
define <vscale x 1 x i9> @vp_ctpop_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_nxv1i9:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 511
-; RV32-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; RV32-NEXT: vand.vx v8, v8, a1
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_nxv1i9:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 511
-; RV64-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; RV64-NEXT: vand.vx v8, v8, a1
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_nxv1i9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 511
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i9:
; CHECK-ZVBB: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index 3fcbfa8b142a65e..fb4735240904682 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -403,61 +403,33 @@ define <vscale x 64 x i8> @cttz_nxv64i8(<vscale x 64 x i8> %va) {
declare <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8>, i1)
define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32I-LABEL: cttz_nxv1i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV32I-NEXT: vsub.vx v9, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_nxv1i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV64I-NEXT: vsub.vx v9, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_nxv1i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_nxv1i16:
; CHECK-F: # %bb.0:
@@ -498,61 +470,33 @@ define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) {
declare <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16>, i1)
define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32I-LABEL: cttz_nxv2i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV32I-NEXT: vsub.vx v9, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_nxv2i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV64I-NEXT: vsub.vx v9, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_nxv2i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_nxv2i16:
; CHECK-F: # %bb.0:
@@ -593,61 +537,33 @@ define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) {
declare <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16>, i1)
define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32I-LABEL: cttz_nxv4i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV32I-NEXT: vsub.vx v9, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_nxv4i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV64I-NEXT: vsub.vx v9, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_nxv4i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_nxv4i16:
; CHECK-F: # %bb.0:
@@ -688,61 +604,33 @@ define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
declare <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16>, i1)
define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32I-LABEL: cttz_nxv8i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32I-NEXT: vsub.vx v10, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v10, v10, a0
-; RV32I-NEXT: vsub.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v10, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v10, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_nxv8i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64I-NEXT: vsub.vx v10, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v10, v10, a0
-; RV64I-NEXT: vsub.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v10, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v10, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_nxv8i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_nxv8i16:
; CHECK-F: # %bb.0:
@@ -783,61 +671,33 @@ define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
declare <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16>, i1)
define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32I-LABEL: cttz_nxv16i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV32I-NEXT: vsub.vx v12, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v12, v12, a0
-; RV32I-NEXT: vsub.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v12, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v12, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_nxv16i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV64I-NEXT: vsub.vx v12, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v12, v12, a0
-; RV64I-NEXT: vsub.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v12, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v12, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_nxv16i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_nxv16i16:
; CHECK-F: # %bb.0:
@@ -878,130 +738,73 @@ define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
declare <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16>, i1)
define <vscale x 32 x i16> @cttz_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: cttz_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a0
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: cttz_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a0
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: cttz_nxv32i16:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false)
- ret <vscale x 32 x i16> %a
+; CHECK-LABEL: cttz_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vsub.vx v16, v8, a0
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: cttz_nxv32i16:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false)
+ ret <vscale x 32 x i16> %a
}
declare <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16>, i1)
define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32I-LABEL: cttz_nxv1i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32I-NEXT: vsub.vx v9, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_nxv1i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64I-NEXT: vsub.vx v9, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_nxv1i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_nxv1i32:
; CHECK-F: # %bb.0:
@@ -1045,63 +848,34 @@ define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
declare <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32>, i1)
define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32I-LABEL: cttz_nxv2i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT: vsub.vx v9, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_nxv2i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64I-NEXT: vsub.vx v9, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_nxv2i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_nxv2i32:
; CHECK-F: # %bb.0:
@@ -1145,63 +919,34 @@ define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
declare <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32>, i1)
define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32I-LABEL: cttz_nxv4i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT: vsub.vx v10, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v10, v10, a0
-; RV32I-NEXT: vsub.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v10, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v10, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_nxv4i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64I-NEXT: vsub.vx v10, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v10, v10, a0
-; RV64I-NEXT: vsub.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v10, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v10, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_nxv4i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_nxv4i32:
; CHECK-F: # %bb.0:
@@ -1245,63 +990,34 @@ define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1)
define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32I-LABEL: cttz_nxv8i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT: vsub.vx v12, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v12, v12, a0
-; RV32I-NEXT: vsub.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v12, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v12, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_nxv8i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64I-NEXT: vsub.vx v12, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v12, v12, a0
-; RV64I-NEXT: vsub.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v12, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v12, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_nxv8i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_nxv8i32:
; CHECK-F: # %bb.0:
@@ -1345,63 +1061,34 @@ define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) {
declare <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32>, i1)
define <vscale x 16 x i32> @cttz_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32I-LABEL: cttz_nxv16i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT: vsub.vx v16, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v16
-; RV32I-NEXT: vsrl.vi v16, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v16, v16, a0
-; RV32I-NEXT: vsub.vv v8, v8, v16
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v16, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v16, v8
-; RV32I-NEXT: vsrl.vi v16, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v16
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_nxv16i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64I-NEXT: vsub.vx v16, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v16
-; RV64I-NEXT: vsrl.vi v16, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v16, v16, a0
-; RV64I-NEXT: vsub.vv v8, v8, v16
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v16, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v16, v8
-; RV64I-NEXT: vsrl.vi v16, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v16
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_nxv16i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v16, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_nxv16i32:
; CHECK-F: # %bb.0:
@@ -2465,61 +2152,33 @@ define <vscale x 64 x i8> @cttz_zero_undef_nxv64i8(<vscale x 64 x i8> %va) {
}
define <vscale x 1 x i16> @cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv1i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV32I-NEXT: vsub.vx v9, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv1i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV64I-NEXT: vsub.vx v9, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_zero_undef_nxv1i16:
; CHECK-F: # %bb.0:
@@ -2527,87 +2186,59 @@ define <vscale x 1 x i16> @cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
; CHECK-F-NEXT: vrsub.vi v9, v8, 0
; CHECK-F-NEXT: vand.vv v8, v8, v9
; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8
-; CHECK-F-NEXT: vnsrl.wi v8, v9, 23
-; CHECK-F-NEXT: li a0, 127
-; CHECK-F-NEXT: vsub.vx v8, v8, a0
-; CHECK-F-NEXT: ret
-;
-; CHECK-D-LABEL: cttz_zero_undef_nxv1i16:
-; CHECK-D: # %bb.0:
-; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-D-NEXT: vrsub.vi v9, v8, 0
-; CHECK-D-NEXT: vand.vv v8, v8, v9
-; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8
-; CHECK-D-NEXT: vnsrl.wi v8, v9, 23
-; CHECK-D-NEXT: li a0, 127
-; CHECK-D-NEXT: vsub.vx v8, v8, a0
-; CHECK-D-NEXT: ret
-;
-; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv1i16:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true)
- ret <vscale x 1 x i16> %a
-}
-
-define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv2i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV32I-NEXT: vsub.vx v9, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv2i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV64I-NEXT: vsub.vx v9, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-F-NEXT: vnsrl.wi v8, v9, 23
+; CHECK-F-NEXT: li a0, 127
+; CHECK-F-NEXT: vsub.vx v8, v8, a0
+; CHECK-F-NEXT: ret
+;
+; CHECK-D-LABEL: cttz_zero_undef_nxv1i16:
+; CHECK-D: # %bb.0:
+; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-D-NEXT: vrsub.vi v9, v8, 0
+; CHECK-D-NEXT: vand.vv v8, v8, v9
+; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT: vnsrl.wi v8, v9, 23
+; CHECK-D-NEXT: li a0, 127
+; CHECK-D-NEXT: vsub.vx v8, v8, a0
+; CHECK-D-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv1i16:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true)
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_zero_undef_nxv2i16:
; CHECK-F: # %bb.0:
@@ -2641,61 +2272,33 @@ define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
}
define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv4i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV32I-NEXT: vsub.vx v9, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv4i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV64I-NEXT: vsub.vx v9, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_zero_undef_nxv4i16:
; CHECK-F: # %bb.0:
@@ -2729,61 +2332,33 @@ define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
}
define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv8i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32I-NEXT: vsub.vx v10, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v10, v10, a0
-; RV32I-NEXT: vsub.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v10, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v10, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv8i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64I-NEXT: vsub.vx v10, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v10, v10, a0
-; RV64I-NEXT: vsub.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v10, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v10, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_zero_undef_nxv8i16:
; CHECK-F: # %bb.0:
@@ -2817,61 +2392,33 @@ define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
}
define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv16i16:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV32I-NEXT: vsub.vx v12, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: lui a0, 5
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v12, v12, a0
-; RV32I-NEXT: vsub.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v12, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v12, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: li a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 8
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv16i16:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV64I-NEXT: vsub.vx v12, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: lui a0, 5
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v12, v12, a0
-; RV64I-NEXT: vsub.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v12, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v12, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: li a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 8
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i16:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 3
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 1
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: li a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_zero_undef_nxv16i16:
; CHECK-F: # %bb.0:
@@ -2905,129 +2452,72 @@ define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
}
define <vscale x 32 x i16> @cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va) {
-; RV32-LABEL: cttz_zero_undef_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a0
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: cttz_zero_undef_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a0
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: cttz_zero_undef_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vsub.vx v16, v8, a0
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv32i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-ZVBB-NEXT: vctz.v v8, v8
-; CHECK-ZVBB-NEXT: ret
- %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true)
- ret <vscale x 32 x i16> %a
-}
-
-define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv1i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32I-NEXT: vsub.vx v9, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv1i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64I-NEXT: vsub.vx v9, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVBB-NEXT: vctz.v v8, v8
+; CHECK-ZVBB-NEXT: ret
+ %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true)
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_zero_undef_nxv1i32:
; CHECK-F: # %bb.0:
@@ -3064,63 +2554,34 @@ define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
}
define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv2i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT: vsub.vx v9, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v9
-; RV32I-NEXT: vsrl.vi v9, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v9, v9, a0
-; RV32I-NEXT: vsub.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v9, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v9, v8
-; RV32I-NEXT: vsrl.vi v9, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v9
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv2i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64I-NEXT: vsub.vx v9, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v9
-; RV64I-NEXT: vsrl.vi v9, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v9, v9, a0
-; RV64I-NEXT: vsub.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v9, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v9, v8
-; RV64I-NEXT: vsrl.vi v9, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v9
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_zero_undef_nxv2i32:
; CHECK-F: # %bb.0:
@@ -3157,63 +2618,34 @@ define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
}
define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv4i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT: vsub.vx v10, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v10
-; RV32I-NEXT: vsrl.vi v10, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v10, v10, a0
-; RV32I-NEXT: vsub.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v10, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v10, v8
-; RV32I-NEXT: vsrl.vi v10, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v10
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv4i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64I-NEXT: vsub.vx v10, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v10
-; RV64I-NEXT: vsrl.vi v10, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v10, v10, a0
-; RV64I-NEXT: vsub.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v10, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v10, v8
-; RV64I-NEXT: vsrl.vi v10, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v10
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_zero_undef_nxv4i32:
; CHECK-F: # %bb.0:
@@ -3250,63 +2682,34 @@ define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
}
define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv8i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT: vsub.vx v12, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v12
-; RV32I-NEXT: vsrl.vi v12, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v12, v12, a0
-; RV32I-NEXT: vsub.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v12, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v12, v8
-; RV32I-NEXT: vsrl.vi v12, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v12
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv8i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64I-NEXT: vsub.vx v12, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v12
-; RV64I-NEXT: vsrl.vi v12, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v12, v12, a0
-; RV64I-NEXT: vsub.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v12, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v12, v8
-; RV64I-NEXT: vsrl.vi v12, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v12
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_zero_undef_nxv8i32:
; CHECK-F: # %bb.0:
@@ -3343,63 +2746,34 @@ define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
}
define <vscale x 16 x i32> @cttz_zero_undef_nxv16i32(<vscale x 16 x i32> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv16i32:
-; RV32I: # %bb.0:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT: vsub.vx v16, v8, a0
-; RV32I-NEXT: vnot.v v8, v8
-; RV32I-NEXT: vand.vv v8, v8, v16
-; RV32I-NEXT: vsrl.vi v16, v8, 1
-; RV32I-NEXT: lui a0, 349525
-; RV32I-NEXT: addi a0, a0, 1365
-; RV32I-NEXT: vand.vx v16, v16, a0
-; RV32I-NEXT: vsub.vv v8, v8, v16
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi a0, a0, 819
-; RV32I-NEXT: vand.vx v16, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 2
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: vadd.vv v8, v16, v8
-; RV32I-NEXT: vsrl.vi v16, v8, 4
-; RV32I-NEXT: vadd.vv v8, v8, v16
-; RV32I-NEXT: lui a0, 61681
-; RV32I-NEXT: addi a0, a0, -241
-; RV32I-NEXT: vand.vx v8, v8, a0
-; RV32I-NEXT: lui a0, 4112
-; RV32I-NEXT: addi a0, a0, 257
-; RV32I-NEXT: vmul.vx v8, v8, a0
-; RV32I-NEXT: vsrl.vi v8, v8, 24
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv16i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64I-NEXT: vsub.vx v16, v8, a0
-; RV64I-NEXT: vnot.v v8, v8
-; RV64I-NEXT: vand.vv v8, v8, v16
-; RV64I-NEXT: vsrl.vi v16, v8, 1
-; RV64I-NEXT: lui a0, 349525
-; RV64I-NEXT: addiw a0, a0, 1365
-; RV64I-NEXT: vand.vx v16, v16, a0
-; RV64I-NEXT: vsub.vv v8, v8, v16
-; RV64I-NEXT: lui a0, 209715
-; RV64I-NEXT: addiw a0, a0, 819
-; RV64I-NEXT: vand.vx v16, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 2
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: vadd.vv v8, v16, v8
-; RV64I-NEXT: vsrl.vi v16, v8, 4
-; RV64I-NEXT: vadd.vv v8, v8, v16
-; RV64I-NEXT: lui a0, 61681
-; RV64I-NEXT: addiw a0, a0, -241
-; RV64I-NEXT: vand.vx v8, v8, a0
-; RV64I-NEXT: lui a0, 4112
-; RV64I-NEXT: addiw a0, a0, 257
-; RV64I-NEXT: vmul.vx v8, v8, a0
-; RV64I-NEXT: vsrl.vi v8, v8, 24
-; RV64I-NEXT: ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i32:
+; CHECK-ZVE64X: # %bb.0:
+; CHECK-ZVE64X-NEXT: li a0, 1
+; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-ZVE64X-NEXT: vsub.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1
+; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
+; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0
+; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: lui a0, 209715
+; CHECK-ZVE64X-NEXT: addi a0, a0, 819
+; CHECK-ZVE64X-NEXT: vand.vx v16, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v16, v8
+; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4
+; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v16
+; CHECK-ZVE64X-NEXT: lui a0, 61681
+; CHECK-ZVE64X-NEXT: addi a0, a0, -241
+; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: lui a0, 4112
+; CHECK-ZVE64X-NEXT: addi a0, a0, 257
+; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24
+; CHECK-ZVE64X-NEXT: ret
;
; CHECK-F-LABEL: cttz_zero_undef_nxv16i32:
; CHECK-F: # %bb.0:
@@ -3915,3 +3289,6 @@ define <vscale x 8 x i64> @cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
%a = call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true)
ret <vscale x 8 x i64> %a
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index 1794b0cce7f86a0..1dda8aa458d9069 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -473,61 +473,33 @@ define <vscale x 64 x i8> @vp_cttz_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32
declare <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
define <vscale x 1 x i16> @vp_cttz_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16:
; CHECK-ZVBB: # %bb.0:
@@ -539,61 +511,33 @@ define <vscale x 1 x i16> @vp_cttz_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x
}
define <vscale x 1 x i16> @vp_cttz_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv1i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv1i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -609,61 +553,33 @@ define <vscale x 1 x i16> @vp_cttz_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32
declare <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
define <vscale x 2 x i16> @vp_cttz_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16:
; CHECK-ZVBB: # %bb.0:
@@ -675,61 +591,33 @@ define <vscale x 2 x i16> @vp_cttz_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x
}
define <vscale x 2 x i16> @vp_cttz_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -745,61 +633,33 @@ define <vscale x 2 x i16> @vp_cttz_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32
declare <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
define <vscale x 4 x i16> @vp_cttz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16:
; CHECK-ZVBB: # %bb.0:
@@ -811,61 +671,33 @@ define <vscale x 4 x i16> @vp_cttz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x
}
define <vscale x 4 x i16> @vp_cttz_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -881,61 +713,33 @@ define <vscale x 4 x i16> @vp_cttz_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32
declare <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
define <vscale x 8 x i16> @vp_cttz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16:
; CHECK-ZVBB: # %bb.0:
@@ -947,61 +751,33 @@ define <vscale x 8 x i16> @vp_cttz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x
}
define <vscale x 8 x i16> @vp_cttz_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1017,61 +793,33 @@ define <vscale x 8 x i16> @vp_cttz_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32
declare <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
define <vscale x 16 x i16> @vp_cttz_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16:
; CHECK-ZVBB: # %bb.0:
@@ -1083,61 +831,33 @@ define <vscale x 16 x i16> @vp_cttz_nxv16i16(<vscale x 16 x i16> %va, <vscale x
}
define <vscale x 16 x i16> @vp_cttz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsub.vx v12, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1153,61 +873,33 @@ define <vscale x 16 x i16> @vp_cttz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i
declare <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
define <vscale x 32 x i16> @vp_cttz_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16:
; CHECK-ZVBB: # %bb.0:
@@ -1219,61 +911,33 @@ define <vscale x 32 x i16> @vp_cttz_nxv32i16(<vscale x 32 x i16> %va, <vscale x
}
define <vscale x 32 x i16> @vp_cttz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv32i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv32i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsub.vx v16, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1289,63 +953,34 @@ define <vscale x 32 x i16> @vp_cttz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i
declare <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
define <vscale x 1 x i32> @vp_cttz_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32:
; CHECK-ZVBB: # %bb.0:
@@ -1357,63 +992,34 @@ define <vscale x 1 x i32> @vp_cttz_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x
}
define <vscale x 1 x i32> @vp_cttz_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv1i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv1i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1429,63 +1035,34 @@ define <vscale x 1 x i32> @vp_cttz_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32
declare <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
define <vscale x 2 x i32> @vp_cttz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32:
; CHECK-ZVBB: # %bb.0:
@@ -1497,63 +1074,34 @@ define <vscale x 2 x i32> @vp_cttz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x
}
define <vscale x 2 x i32> @vp_cttz_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1569,63 +1117,34 @@ define <vscale x 2 x i32> @vp_cttz_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32
declare <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
define <vscale x 4 x i32> @vp_cttz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32:
; CHECK-ZVBB: # %bb.0:
@@ -1637,63 +1156,34 @@ define <vscale x 4 x i32> @vp_cttz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x
}
define <vscale x 4 x i32> @vp_cttz_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1709,63 +1199,34 @@ define <vscale x 4 x i32> @vp_cttz_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32
declare <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
define <vscale x 8 x i32> @vp_cttz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32:
; CHECK-ZVBB: # %bb.0:
@@ -1777,63 +1238,34 @@ define <vscale x 8 x i32> @vp_cttz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x
}
define <vscale x 8 x i32> @vp_cttz_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsub.vx v12, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -1849,63 +1281,34 @@ define <vscale x 8 x i32> @vp_cttz_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32
declare <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
define <vscale x 16 x i32> @vp_cttz_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32:
; CHECK-ZVBB: # %bb.0:
@@ -1917,63 +1320,34 @@ define <vscale x 16 x i32> @vp_cttz_nxv16i32(<vscale x 16 x i32> %va, <vscale x
}
define <vscale x 16 x i32> @vp_cttz_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsub.vx v16, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -4138,61 +3512,33 @@ define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16_unmasked(<vscale x 16 x
define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16:
; CHECK-ZVBB: # %bb.0:
@@ -4204,61 +3550,33 @@ define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va,
}
define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsub.vx v16, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v16, v16, a0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsub.vx v16, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsub.vx v16, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v16
+; CHECK-NEXT: vsrl.vi v16, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: vsrl.vi v16, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
; CHECK-ZVBB: # %bb.0:
@@ -4918,65 +4236,35 @@ define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64_unmasked(<vscale x 16 x
; Test promotion.
declare <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9>, i1 immarg, <vscale x 1 x i1>, i32)
define <vscale x 1 x i9> @vp_cttz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_nxv1i9:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 512
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vor.vx v8, v8, a1, v0.t
-; RV32-NEXT: li a0, 1
-; RV32-NEXT: vsub.vx v9, v8, a0, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_nxv1i9:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 512
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vor.vx v8, v8, a1, v0.t
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: vsub.vx v9, v8, a0, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_nxv1i9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 512
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vor.vx v8, v8, a1, v0.t
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vsub.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv1i9:
; CHECK-ZVBB: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
index f5e5b9e9083b8dc..34dcce3fe058bc9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
@@ -769,7 +769,7 @@ define i32 @extractelt_sdiv_nxv4i32_splat(<vscale x 4 x i32> %x) {
; RV64NOM-LABEL: extractelt_sdiv_nxv4i32_splat:
; RV64NOM: # %bb.0:
; RV64NOM-NEXT: lui a0, 349525
-; RV64NOM-NEXT: addiw a0, a0, 1366
+; RV64NOM-NEXT: addi a0, a0, 1366
; RV64NOM-NEXT: vsetvli a1, zero, e32, m2, ta, ma
; RV64NOM-NEXT: vmulh.vx v8, v8, a0
; RV64NOM-NEXT: vsrl.vi v10, v8, 31
@@ -799,7 +799,7 @@ define i32 @extractelt_udiv_nxv4i32_splat(<vscale x 4 x i32> %x) {
; RV64NOM-LABEL: extractelt_udiv_nxv4i32_splat:
; RV64NOM: # %bb.0:
; RV64NOM-NEXT: lui a0, 349525
-; RV64NOM-NEXT: addiw a0, a0, 1366
+; RV64NOM-NEXT: addi a0, a0, 1366
; RV64NOM-NEXT: vsetvli a1, zero, e32, m2, ta, ma
; RV64NOM-NEXT: vmulh.vx v8, v8, a0
; RV64NOM-NEXT: vsrl.vi v10, v8, 31
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
index cf8dfea197afae7..22f92fe48e22e71 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
@@ -231,125 +231,67 @@ define <16 x i8> @vp_bitreverse_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl)
declare <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16>, <2 x i1>, i32)
define <2 x i16> @vp_bitreverse_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
ret <2 x i16> %v
}
define <2 x i16> @vp_bitreverse_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
@@ -359,125 +301,67 @@ define <2 x i16> @vp_bitreverse_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl)
declare <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16>, <4 x i1>, i32)
define <4 x i16> @vp_bitreverse_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
ret <4 x i16> %v
}
define <4 x i16> @vp_bitreverse_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
@@ -487,125 +371,67 @@ define <4 x i16> @vp_bitreverse_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl)
declare <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16>, <8 x i1>, i32)
define <8 x i16> @vp_bitreverse_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
ret <8 x i16> %v
}
define <8 x i16> @vp_bitreverse_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
@@ -615,125 +441,67 @@ define <8 x i16> @vp_bitreverse_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl)
declare <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16>, <16 x i1>, i32)
define <16 x i16> @vp_bitreverse_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
ret <16 x i16> %v
}
define <16 x i16> @vp_bitreverse_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 2
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
@@ -743,157 +511,83 @@ define <16 x i16> @vp_bitreverse_v16i16_unmasked(<16 x i16> %va, i32 zeroext %ev
declare <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32>, <2 x i1>, i32)
define <2 x i32> @vp_bitreverse_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT: vor.vv v9, v9, v10, v0.t
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT: vor.vv v9, v9, v10, v0.t
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
ret <2 x i32> %v
}
define <2 x i32> @vp_bitreverse_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
@@ -903,317 +597,169 @@ define <2 x i32> @vp_bitreverse_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl)
declare <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32>, <4 x i1>, i32)
define <4 x i32> @vp_bitreverse_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT: vor.vv v9, v9, v10, v0.t
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v9, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT: vor.vv v9, v9, v10, v0.t
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v9, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
ret <4 x i32> %v
}
define <4 x i32> @vp_bitreverse_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v9, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: ret
- %head = insertelement <4 x i1> poison, i1 true, i32 0
- %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
- %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
- ret <4 x i32> %v
-}
-
-declare <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32>, <8 x i1>, i32)
-
-define <8 x i32> @vp_bitreverse_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
-; RV32-NEXT: vor.vv v10, v10, v12, v0.t
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v12, v12, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 24, v0.t
-; RV64-NEXT: vor.vv v10, v10, v12, v0.t
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v12, v12, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v10, v8, v0.t
-; RV64-NEXT: ret
- %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
- ret <8 x i32> %v
-}
-
-define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsrl.vi v12, v8, 24
-; RV32-NEXT: vor.vv v10, v10, v12
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsll.vi v12, v12, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsrl.vi v12, v8, 24
-; RV64-NEXT: vor.vv v10, v10, v12
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsll.vi v12, v12, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v10, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vp_bitreverse_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_bitreverse_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v10, v10, v12, v0.t
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v10, v8, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_bitreverse_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsrl.vi v12, v8, 24
+; CHECK-NEXT: vor.vv v10, v10, v12
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsll.vi v12, v12, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v10, v8
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
@@ -1223,157 +769,83 @@ define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl)
declare <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32>, <16 x i1>, i32)
define <16 x i32> @vp_bitreverse_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t
-; RV32-NEXT: vor.vv v12, v12, v16, v0.t
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v12, v8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
-; RV64-NEXT: vor.vv v12, v12, v16, v0.t
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v16, v16, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v12, v8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v12, v12, v16, v0.t
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v12, v8, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
ret <16 x i32> %v
}
define <16 x i32> @vp_bitreverse_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsrl.vi v16, v8, 24
-; RV32-NEXT: vor.vv v12, v12, v16
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsll.vi v16, v16, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v12, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsrl.vi v16, v8, 24
-; RV64-NEXT: vor.vv v12, v12, v16
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsll.vi v16, v16, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v12, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsrl.vi v16, v8, 24
+; CHECK-NEXT: vor.vv v12, v12, v16
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsll.vi v16, v16, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 2
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v12, v8
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
@@ -2945,295 +2417,152 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev
declare <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16>, <128 x i1>, i32)
define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v128i16:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; RV32-NEXT: li a2, 64
-; RV32-NEXT: vslidedown.vi v24, v0, 8
-; RV32-NEXT: mv a1, a0
-; RV32-NEXT: bltu a0, a2, .LBB34_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a1, 64
-; RV32-NEXT: .LBB34_2:
-; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: lui a1, 1
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: vand.vx v16, v16, a1, v0.t
-; RV32-NEXT: vand.vx v8, v8, a1, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: lui a2, 3
-; RV32-NEXT: addi a2, a2, 819
-; RV32-NEXT: vand.vx v16, v16, a2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a2, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: lui a3, 5
-; RV32-NEXT: addi a3, a3, 1365
-; RV32-NEXT: vand.vx v16, v16, a3, v0.t
-; RV32-NEXT: vand.vx v8, v8, a3, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: addi a4, sp, 16
-; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: addi a4, a0, -64
-; RV32-NEXT: sltu a0, a0, a4
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a0, a0, a4
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vmv1r.v v0, v24
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vand.vx v16, v16, a1, v0.t
-; RV32-NEXT: vand.vx v8, v8, a1, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vand.vx v16, v16, a2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a2, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vand.vx v16, v16, a3, v0.t
-; RV32-NEXT: vand.vx v8, v8, a3, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV32-NEXT: vor.vv v16, v16, v8, v0.t
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v128i16:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 4
-; RV64-NEXT: sub sp, sp, a1
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; RV64-NEXT: li a2, 64
-; RV64-NEXT: vslidedown.vi v24, v0, 8
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: bltu a0, a2, .LBB34_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: li a1, 64
-; RV64-NEXT: .LBB34_2:
-; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: lui a1, 1
-; RV64-NEXT: addiw a1, a1, -241
-; RV64-NEXT: vand.vx v16, v16, a1, v0.t
-; RV64-NEXT: vand.vx v8, v8, a1, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: lui a2, 3
-; RV64-NEXT: addiw a2, a2, 819
-; RV64-NEXT: vand.vx v16, v16, a2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a2, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a3, 5
-; RV64-NEXT: addiw a3, a3, 1365
-; RV64-NEXT: vand.vx v16, v16, a3, v0.t
-; RV64-NEXT: vand.vx v8, v8, a3, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: addi a4, sp, 16
-; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
-; RV64-NEXT: addi a4, a0, -64
-; RV64-NEXT: sltu a0, a0, a4
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a0, a0, a4
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vmv1r.v v0, v24
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vand.vx v16, v16, a1, v0.t
-; RV64-NEXT: vand.vx v8, v8, a1, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vand.vx v16, v16, a2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a2, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vand.vx v16, v16, a3, v0.t
-; RV64-NEXT: vand.vx v8, v8, a3, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
-; RV64-NEXT: vor.vv v16, v16, v8, v0.t
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
-; RV64-NEXT: add sp, sp, a0
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v128i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vslidedown.vi v24, v0, 8
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB34_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 64
+; CHECK-NEXT: .LBB34_2:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: addi a1, a1, -241
+; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT: lui a2, 3
+; CHECK-NEXT: addi a2, a2, 819
+; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: lui a3, 5
+; CHECK-NEXT: addi a3, a3, 1365
+; CHECK-NEXT: vand.vx v16, v16, a3, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: addi a4, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a4, a0, -64
+; CHECK-NEXT: sltu a0, a0, a4
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a4
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
+; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
+; CHECK-NEXT: vand.vx v16, v16, a3, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v16, v16, v8, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
%v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl)
ret <128 x i16> %v
}
define <128 x i16> @vp_bitreverse_v128i16_unmasked(<128 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bitreverse_v128i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a2, 64
-; RV32-NEXT: mv a1, a0
-; RV32-NEXT: bltu a0, a2, .LBB35_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a1, 64
-; RV32-NEXT: .LBB35_2:
-; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v8, 8
-; RV32-NEXT: vsll.vi v8, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 4
-; RV32-NEXT: lui a1, 1
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: vand.vx v24, v24, a1
-; RV32-NEXT: vand.vx v8, v8, a1
-; RV32-NEXT: vsll.vi v8, v8, 4
-; RV32-NEXT: vor.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v24, v8, 2
-; RV32-NEXT: lui a2, 3
-; RV32-NEXT: addi a2, a2, 819
-; RV32-NEXT: vand.vx v24, v24, a2
-; RV32-NEXT: vand.vx v8, v8, a2
-; RV32-NEXT: vsll.vi v8, v8, 2
-; RV32-NEXT: vor.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v24, v8, 1
-; RV32-NEXT: lui a3, 5
-; RV32-NEXT: addi a3, a3, 1365
-; RV32-NEXT: vand.vx v24, v24, a3
-; RV32-NEXT: vand.vx v8, v8, a3
-; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vor.vv v8, v24, v8
-; RV32-NEXT: addi a4, a0, -64
-; RV32-NEXT: sltu a0, a0, a4
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a0, a0, a4
-; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v16, 8
-; RV32-NEXT: vsll.vi v16, v16, 8
-; RV32-NEXT: vor.vv v16, v16, v24
-; RV32-NEXT: vsrl.vi v24, v16, 4
-; RV32-NEXT: vand.vx v24, v24, a1
-; RV32-NEXT: vand.vx v16, v16, a1
-; RV32-NEXT: vsll.vi v16, v16, 4
-; RV32-NEXT: vor.vv v16, v24, v16
-; RV32-NEXT: vsrl.vi v24, v16, 2
-; RV32-NEXT: vand.vx v24, v24, a2
-; RV32-NEXT: vand.vx v16, v16, a2
-; RV32-NEXT: vsll.vi v16, v16, 2
-; RV32-NEXT: vor.vv v16, v24, v16
-; RV32-NEXT: vsrl.vi v24, v16, 1
-; RV32-NEXT: vand.vx v24, v24, a3
-; RV32-NEXT: vand.vx v16, v16, a3
-; RV32-NEXT: vadd.vv v16, v16, v16
-; RV32-NEXT: vor.vv v16, v24, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bitreverse_v128i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a2, 64
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: bltu a0, a2, .LBB35_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: li a1, 64
-; RV64-NEXT: .LBB35_2:
-; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v24, v8, 8
-; RV64-NEXT: vsll.vi v8, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 4
-; RV64-NEXT: lui a1, 1
-; RV64-NEXT: addiw a1, a1, -241
-; RV64-NEXT: vand.vx v24, v24, a1
-; RV64-NEXT: vand.vx v8, v8, a1
-; RV64-NEXT: vsll.vi v8, v8, 4
-; RV64-NEXT: vor.vv v8, v24, v8
-; RV64-NEXT: vsrl.vi v24, v8, 2
-; RV64-NEXT: lui a2, 3
-; RV64-NEXT: addiw a2, a2, 819
-; RV64-NEXT: vand.vx v24, v24, a2
-; RV64-NEXT: vand.vx v8, v8, a2
-; RV64-NEXT: vsll.vi v8, v8, 2
-; RV64-NEXT: vor.vv v8, v24, v8
-; RV64-NEXT: vsrl.vi v24, v8, 1
-; RV64-NEXT: lui a3, 5
-; RV64-NEXT: addiw a3, a3, 1365
-; RV64-NEXT: vand.vx v24, v24, a3
-; RV64-NEXT: vand.vx v8, v8, a3
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vor.vv v8, v24, v8
-; RV64-NEXT: addi a4, a0, -64
-; RV64-NEXT: sltu a0, a0, a4
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a0, a0, a4
-; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; RV64-NEXT: vsrl.vi v24, v16, 8
-; RV64-NEXT: vsll.vi v16, v16, 8
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 4
-; RV64-NEXT: vand.vx v24, v24, a1
-; RV64-NEXT: vand.vx v16, v16, a1
-; RV64-NEXT: vsll.vi v16, v16, 4
-; RV64-NEXT: vor.vv v16, v24, v16
-; RV64-NEXT: vsrl.vi v24, v16, 2
-; RV64-NEXT: vand.vx v24, v24, a2
-; RV64-NEXT: vand.vx v16, v16, a2
-; RV64-NEXT: vsll.vi v16, v16, 2
-; RV64-NEXT: vor.vv v16, v24, v16
-; RV64-NEXT: vsrl.vi v24, v16, 1
-; RV64-NEXT: vand.vx v24, v24, a3
-; RV64-NEXT: vand.vx v16, v16, a3
-; RV64-NEXT: vadd.vv v16, v16, v16
-; RV64-NEXT: vor.vv v16, v24, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bitreverse_v128i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB35_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 64
+; CHECK-NEXT: .LBB35_2:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v24, v8, 8
+; CHECK-NEXT: vsll.vi v8, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v24
+; CHECK-NEXT: vsrl.vi v24, v8, 4
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: addi a1, a1, -241
+; CHECK-NEXT: vand.vx v24, v24, a1
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vsll.vi v8, v8, 4
+; CHECK-NEXT: vor.vv v8, v24, v8
+; CHECK-NEXT: vsrl.vi v24, v8, 2
+; CHECK-NEXT: lui a2, 3
+; CHECK-NEXT: addi a2, a2, 819
+; CHECK-NEXT: vand.vx v24, v24, a2
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vsll.vi v8, v8, 2
+; CHECK-NEXT: vor.vv v8, v24, v8
+; CHECK-NEXT: vsrl.vi v24, v8, 1
+; CHECK-NEXT: lui a3, 5
+; CHECK-NEXT: addi a3, a3, 1365
+; CHECK-NEXT: vand.vx v24, v24, a3
+; CHECK-NEXT: vand.vx v8, v8, a3
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vor.vv v8, v24, v8
+; CHECK-NEXT: addi a4, a0, -64
+; CHECK-NEXT: sltu a0, a0, a4
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a4
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsrl.vi v24, v16, 8
+; CHECK-NEXT: vsll.vi v16, v16, 8
+; CHECK-NEXT: vor.vv v16, v16, v24
+; CHECK-NEXT: vsrl.vi v24, v16, 4
+; CHECK-NEXT: vand.vx v24, v24, a1
+; CHECK-NEXT: vand.vx v16, v16, a1
+; CHECK-NEXT: vsll.vi v16, v16, 4
+; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: vsrl.vi v24, v16, 2
+; CHECK-NEXT: vand.vx v24, v24, a2
+; CHECK-NEXT: vand.vx v16, v16, a2
+; CHECK-NEXT: vsll.vi v16, v16, 2
+; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: vsrl.vi v24, v16, 1
+; CHECK-NEXT: vand.vx v24, v24, a3
+; CHECK-NEXT: vand.vx v16, v16, a3
+; CHECK-NEXT: vadd.vv v16, v16, v16
+; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: ret
%head = insertelement <128 x i1> poison, i1 true, i32 0
%m = shufflevector <128 x i1> %head, <128 x i1> poison, <128 x i32> zeroinitializer
%v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
index 06256d49f12b2ca..74a3153b5839dc6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
@@ -47,21 +47,21 @@ define void @bitreverse_v8i16(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: vsrl.vi v9, v8, 4
; RV64-NEXT: lui a1, 1
-; RV64-NEXT: addiw a1, a1, -241
+; RV64-NEXT: addi a1, a1, -241
; RV64-NEXT: vand.vx v9, v9, a1
; RV64-NEXT: vand.vx v8, v8, a1
; RV64-NEXT: vsll.vi v8, v8, 4
; RV64-NEXT: vor.vv v8, v9, v8
; RV64-NEXT: vsrl.vi v9, v8, 2
; RV64-NEXT: lui a1, 3
-; RV64-NEXT: addiw a1, a1, 819
+; RV64-NEXT: addi a1, a1, 819
; RV64-NEXT: vand.vx v9, v9, a1
; RV64-NEXT: vand.vx v8, v8, a1
; RV64-NEXT: vsll.vi v8, v8, 2
; RV64-NEXT: vor.vv v8, v9, v8
; RV64-NEXT: vsrl.vi v9, v8, 1
; RV64-NEXT: lui a1, 5
-; RV64-NEXT: addiw a1, a1, 1365
+; RV64-NEXT: addi a1, a1, 1365
; RV64-NEXT: vand.vx v9, v9, a1
; RV64-NEXT: vand.vx v8, v8, a1
; RV64-NEXT: vadd.vv v8, v8, v8
@@ -130,7 +130,7 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) {
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: vsrl.vi v9, v8, 8
; RV64-NEXT: lui a1, 16
-; RV64-NEXT: addiw a1, a1, -256
+; RV64-NEXT: addi a1, a1, -256
; RV64-NEXT: vand.vx v9, v9, a1
; RV64-NEXT: vsrl.vi v10, v8, 24
; RV64-NEXT: vor.vv v9, v9, v10
@@ -141,21 +141,21 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) {
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: vsrl.vi v9, v8, 4
; RV64-NEXT: lui a1, 61681
-; RV64-NEXT: addiw a1, a1, -241
+; RV64-NEXT: addi a1, a1, -241
; RV64-NEXT: vand.vx v9, v9, a1
; RV64-NEXT: vand.vx v8, v8, a1
; RV64-NEXT: vsll.vi v8, v8, 4
; RV64-NEXT: vor.vv v8, v9, v8
; RV64-NEXT: vsrl.vi v9, v8, 2
; RV64-NEXT: lui a1, 209715
-; RV64-NEXT: addiw a1, a1, 819
+; RV64-NEXT: addi a1, a1, 819
; RV64-NEXT: vand.vx v9, v9, a1
; RV64-NEXT: vand.vx v8, v8, a1
; RV64-NEXT: vsll.vi v8, v8, 2
; RV64-NEXT: vor.vv v8, v9, v8
; RV64-NEXT: vsrl.vi v9, v8, 1
; RV64-NEXT: lui a1, 349525
-; RV64-NEXT: addiw a1, a1, 1365
+; RV64-NEXT: addi a1, a1, 1365
; RV64-NEXT: vand.vx v9, v9, a1
; RV64-NEXT: vand.vx v8, v8, a1
; RV64-NEXT: vadd.vv v8, v8, v8
@@ -368,21 +368,21 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) {
; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
; LMULMAX2-RV64-NEXT: lui a1, 1
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64-NEXT: addi a1, a1, -241
; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4
; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8
; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2
; LMULMAX2-RV64-NEXT: lui a1, 3
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64-NEXT: addi a1, a1, 819
; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2
; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8
; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
; LMULMAX2-RV64-NEXT: lui a1, 5
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8
@@ -453,21 +453,21 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
; LMULMAX1-RV64-NEXT: lui a2, 1
-; LMULMAX1-RV64-NEXT: addiw a2, a2, -241
+; LMULMAX1-RV64-NEXT: addi a2, a2, -241
; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2
; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4
; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8
; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2
; LMULMAX1-RV64-NEXT: lui a3, 3
-; LMULMAX1-RV64-NEXT: addiw a3, a3, 819
+; LMULMAX1-RV64-NEXT: addi a3, a3, 819
; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3
; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3
; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8
; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
; LMULMAX1-RV64-NEXT: lui a4, 5
-; LMULMAX1-RV64-NEXT: addiw a4, a4, 1365
+; LMULMAX1-RV64-NEXT: addi a4, a4, 1365
; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4
; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8
@@ -555,7 +555,7 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
; LMULMAX2-RV64-NEXT: vle32.v v8, (a0)
; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8
; LMULMAX2-RV64-NEXT: lui a1, 16
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -256
+; LMULMAX2-RV64-NEXT: addi a1, a1, -256
; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24
; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12
@@ -566,21 +566,21 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
; LMULMAX2-RV64-NEXT: lui a1, 61681
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64-NEXT: addi a1, a1, -241
; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4
; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8
; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2
; LMULMAX2-RV64-NEXT: lui a1, 209715
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64-NEXT: addi a1, a1, 819
; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2
; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8
; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
; LMULMAX2-RV64-NEXT: lui a1, 349525
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8
@@ -662,7 +662,7 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vle32.v v9, (a0)
; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8
; LMULMAX1-RV64-NEXT: lui a2, 16
-; LMULMAX1-RV64-NEXT: addiw a2, a2, -256
+; LMULMAX1-RV64-NEXT: addi a2, a2, -256
; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24
; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11
@@ -673,21 +673,21 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
; LMULMAX1-RV64-NEXT: lui a3, 61681
-; LMULMAX1-RV64-NEXT: addiw a3, a3, -241
+; LMULMAX1-RV64-NEXT: addi a3, a3, -241
; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3
; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3
; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4
; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8
; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2
; LMULMAX1-RV64-NEXT: lui a4, 209715
-; LMULMAX1-RV64-NEXT: addiw a4, a4, 819
+; LMULMAX1-RV64-NEXT: addi a4, a4, 819
; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4
; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8
; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
; LMULMAX1-RV64-NEXT: lui a5, 349525
-; LMULMAX1-RV64-NEXT: addiw a5, a5, 1365
+; LMULMAX1-RV64-NEXT: addi a5, a5, 1365
; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5
; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5
; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
index 050fa3bdaa7c9a5..22061040ddbc1c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
@@ -119,73 +119,41 @@ define <16 x i16> @vp_bswap_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
declare <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32>, <2 x i1>, i32)
define <2 x i32> @vp_bswap_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT: vor.vv v9, v9, v10, v0.t
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT: vor.vv v9, v9, v10, v0.t
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
ret <2 x i32> %v
}
define <2 x i32> @vp_bswap_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_v2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
@@ -195,73 +163,41 @@ define <2 x i32> @vp_bswap_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
declare <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32>, <4 x i1>, i32)
define <4 x i32> @vp_bswap_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV32-NEXT: vor.vv v9, v9, v10, v0.t
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t
-; RV64-NEXT: vor.vv v9, v9, v10, v0.t
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
ret <4 x i32> %v
}
define <4 x i32> @vp_bswap_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_v4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
@@ -271,73 +207,41 @@ define <4 x i32> @vp_bswap_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
declare <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32>, <8 x i1>, i32)
define <8 x i32> @vp_bswap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
-; RV32-NEXT: vor.vv v10, v10, v12, v0.t
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v12, v12, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 24, v0.t
-; RV64-NEXT: vor.vv v10, v10, v12, v0.t
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v12, v12, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v10, v10, v12, v0.t
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
ret <8 x i32> %v
}
define <8 x i32> @vp_bswap_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsrl.vi v12, v8, 24
-; RV32-NEXT: vor.vv v10, v10, v12
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsll.vi v12, v12, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_v8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsrl.vi v12, v8, 24
-; RV64-NEXT: vor.vv v10, v10, v12
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsll.vi v12, v12, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsrl.vi v12, v8, 24
+; CHECK-NEXT: vor.vv v10, v10, v12
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsll.vi v12, v12, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
@@ -347,73 +251,41 @@ define <8 x i32> @vp_bswap_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
declare <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32>, <16 x i1>, i32)
define <16 x i32> @vp_bswap_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t
-; RV32-NEXT: vor.vv v12, v12, v16, v0.t
-; RV32-NEXT: vand.vx v16, v8, a0, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
-; RV32-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_v16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t
-; RV64-NEXT: vor.vv v12, v12, v16, v0.t
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsll.vi v16, v16, 8, v0.t
-; RV64-NEXT: vsll.vi v8, v8, 24, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v12, v12, v16, v0.t
+; CHECK-NEXT: vand.vx v16, v8, a0, v0.t
+; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
ret <16 x i32> %v
}
define <16 x i32> @vp_bswap_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_bswap_v16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsrl.vi v16, v8, 24
-; RV32-NEXT: vor.vv v12, v12, v16
-; RV32-NEXT: vand.vx v16, v8, a0
-; RV32-NEXT: vsll.vi v16, v16, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_bswap_v16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsrl.vi v16, v8, 24
-; RV64-NEXT: vor.vv v12, v12, v16
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsll.vi v16, v16, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_bswap_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 8
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -256
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsrl.vi v16, v8, 24
+; CHECK-NEXT: vor.vv v12, v12, v16
+; CHECK-NEXT: vand.vx v16, v8, a0
+; CHECK-NEXT: vsll.vi v16, v16, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
index 4d78da2d6476035..628a3e072abcd01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
@@ -33,41 +33,23 @@ define void @bswap_v8i16(ptr %x, ptr %y) {
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
define void @bswap_v4i32(ptr %x, ptr %y) {
-; RV32-LABEL: bswap_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: lui a1, 16
-; RV32-NEXT: addi a1, a1, -256
-; RV32-NEXT: vand.vx v9, v9, a1
-; RV32-NEXT: vsrl.vi v10, v8, 24
-; RV32-NEXT: vor.vv v9, v9, v10
-; RV32-NEXT: vand.vx v10, v8, a1
-; RV32-NEXT: vsll.vi v10, v10, 8
-; RV32-NEXT: vsll.vi v8, v8, 24
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: bswap_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: lui a1, 16
-; RV64-NEXT: addiw a1, a1, -256
-; RV64-NEXT: vand.vx v9, v9, a1
-; RV64-NEXT: vsrl.vi v10, v8, 24
-; RV64-NEXT: vor.vv v9, v9, v10
-; RV64-NEXT: vand.vx v10, v8, a1
-; RV64-NEXT: vsll.vi v10, v10, 8
-; RV64-NEXT: vsll.vi v8, v8, 24
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: bswap_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: lui a1, 16
+; CHECK-NEXT: addi a1, a1, -256
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vsrl.vi v10, v8, 24
+; CHECK-NEXT: vor.vv v9, v9, v10
+; CHECK-NEXT: vand.vx v10, v8, a1
+; CHECK-NEXT: vsll.vi v10, v10, 8
+; CHECK-NEXT: vsll.vi v8, v8, 24
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
;
; ZVKB-LABEL: bswap_v4i32:
; ZVKB: # %bb.0:
@@ -269,7 +251,7 @@ define void @bswap_v8i32(ptr %x, ptr %y) {
; LMULMAX2-RV64-NEXT: vle32.v v8, (a0)
; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8
; LMULMAX2-RV64-NEXT: lui a1, 16
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -256
+; LMULMAX2-RV64-NEXT: addi a1, a1, -256
; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24
; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12
@@ -319,7 +301,7 @@ define void @bswap_v8i32(ptr %x, ptr %y) {
; LMULMAX1-RV64-NEXT: vle32.v v9, (a0)
; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8
; LMULMAX1-RV64-NEXT: lui a2, 16
-; LMULMAX1-RV64-NEXT: addiw a2, a2, -256
+; LMULMAX1-RV64-NEXT: addi a2, a2, -256
; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24
; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index 9e4584eb17ff9a7..2bbc04172bd1421 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -236,7 +236,7 @@ define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d)
; RV64: # %bb.0:
; RV64-NEXT: addiw a1, a1, 25
; RV64-NEXT: addiw a2, a2, 1
-; RV64-NEXT: addiw a3, a3, 2047
+; RV64-NEXT: addi a3, a3, 2047
; RV64-NEXT: addiw a3, a3, 308
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vslide1down.vx v8, v8, a0
@@ -273,7 +273,7 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
; RV64: # %bb.0:
; RV64-NEXT: addiw a1, a1, 25
; RV64-NEXT: addiw a2, a2, 1
-; RV64-NEXT: addiw a3, a3, 2047
+; RV64-NEXT: addi a3, a3, 2047
; RV64-NEXT: addiw a3, a3, 308
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vslide1down.vx v8, v8, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
index e53877f53833fe6..d47971ef5a13ca6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
@@ -247,141 +247,75 @@ define <16 x i8> @vp_ctlz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
define <2 x i16> @vp_ctlz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl)
ret <2 x i16> %v
}
define <2 x i16> @vp_ctlz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl)
@@ -391,141 +325,75 @@ define <2 x i16> @vp_ctlz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
define <4 x i16> @vp_ctlz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl)
ret <4 x i16> %v
}
define <4 x i16> @vp_ctlz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl)
@@ -535,141 +403,75 @@ define <4 x i16> @vp_ctlz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
define <8 x i16> @vp_ctlz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl)
ret <8 x i16> %v
}
define <8 x i16> @vp_ctlz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl)
@@ -679,141 +481,75 @@ define <8 x i16> @vp_ctlz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
define <16 x i16> @vp_ctlz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl)
ret <16 x i16> %v
}
define <16 x i16> @vp_ctlz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl)
@@ -823,153 +559,81 @@ define <16 x i16> @vp_ctlz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
define <2 x i32> @vp_ctlz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl)
ret <2 x i32> %v
}
define <2 x i32> @vp_ctlz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 16
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl)
@@ -979,9 +643,261 @@ define <2 x i32> @vp_ctlz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i32:
+; CHECK-LABEL: vp_ctlz_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 16
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
+
+define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 16
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
+
+define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 16
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
+
+define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v9, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
@@ -992,32 +908,48 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v9, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
; RV32-NEXT: vor.vv v8, v8, v9, v0.t
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t
+; RV32-NEXT: vor.vv v8, v8, v9, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT: vand.vv v9, v9, v10, v0.t
; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT: vand.vv v10, v8, v9, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
+; RV32-NEXT: vand.vv v8, v8, v9, v0.t
+; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT: vand.vv v8, v8, v9, v0.t
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
+; RV32-NEXT: li a0, 56
+; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v4i32:
+; RV64-LABEL: vp_ctlz_v2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
; RV64-NEXT: vor.vv v8, v8, v9, v0.t
; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
@@ -1028,14 +960,21 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV64-NEXT: vor.vv v8, v8, v9, v0.t
; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
; RV64-NEXT: vor.vv v8, v8, v9, v0.t
+; RV64-NEXT: li a0, 32
+; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t
+; RV64-NEXT: vor.vv v8, v8, v9, v0.t
; RV64-NEXT: vnot.v v8, v8, v0.t
; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
; RV64-NEXT: lui a0, 349525
; RV64-NEXT: addiw a0, a0, 1365
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v9, v9, a0, v0.t
; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
; RV64-NEXT: lui a0, 209715
; RV64-NEXT: addiw a0, a0, 819
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v9, v8, a0, v0.t
; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
@@ -1044,20 +983,25 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
; RV64-NEXT: lui a0, 61681
; RV64-NEXT: addiw a0, a0, -241
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
; RV64-NEXT: lui a0, 4112
; RV64-NEXT: addiw a0, a0, 257
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
+; RV64-NEXT: li a0, 56
+; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
; RV64-NEXT: ret
- %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
- ret <4 x i32> %v
+ %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
}
-define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i32_unmasked:
+define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v2i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsrl.vi v9, v8, 1
; RV32-NEXT: vor.vv v8, v8, v9
; RV32-NEXT: vsrl.vi v9, v8, 2
@@ -1068,32 +1012,48 @@ define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v9
; RV32-NEXT: vsrl.vi v9, v8, 16
; RV32-NEXT: vor.vv v8, v8, v9
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v9, v8, a1
+; RV32-NEXT: vor.vv v8, v8, v9
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT: vand.vv v9, v9, v10
; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT: vand.vv v10, v8, v9
; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
+; RV32-NEXT: vand.vv v8, v8, v9
+; RV32-NEXT: vadd.vv v8, v10, v8
; RV32-NEXT: vsrl.vi v9, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT: vand.vv v8, v8, v9
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT: vmul.vv v8, v8, v9
+; RV32-NEXT: li a0, 56
+; RV32-NEXT: vsrl.vx v8, v8, a0
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v4i32_unmasked:
+; RV64-LABEL: vp_ctlz_v2i64_unmasked:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV64-NEXT: vsrl.vi v9, v8, 1
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: vsrl.vi v9, v8, 2
@@ -1104,14 +1064,21 @@ define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: vsrl.vi v9, v8, 16
; RV64-NEXT: vor.vv v8, v8, v9
+; RV64-NEXT: li a0, 32
+; RV64-NEXT: vsrl.vx v9, v8, a0
+; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: vnot.v v8, v8
; RV64-NEXT: vsrl.vi v9, v8, 1
; RV64-NEXT: lui a0, 349525
; RV64-NEXT: addiw a0, a0, 1365
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v9, v9, a0
; RV64-NEXT: vsub.vv v8, v8, v9
; RV64-NEXT: lui a0, 209715
; RV64-NEXT: addiw a0, a0, 819
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v9, v8, a0
; RV64-NEXT: vsrl.vi v8, v8, 2
; RV64-NEXT: vand.vx v8, v8, a0
@@ -1120,24 +1087,29 @@ define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
; RV64-NEXT: vadd.vv v8, v8, v9
; RV64-NEXT: lui a0, 61681
; RV64-NEXT: addiw a0, a0, -241
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v8, v8, a0
; RV64-NEXT: lui a0, 4112
; RV64-NEXT: addiw a0, a0, 257
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
+; RV64-NEXT: li a0, 56
+; RV64-NEXT: vsrl.vx v8, v8, a0
; RV64-NEXT: ret
- %head = insertelement <4 x i1> poison, i1 true, i32 0
- %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
- %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
- ret <4 x i32> %v
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
}
-declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
+declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
-define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i32:
+define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v10, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
@@ -1148,32 +1120,48 @@ define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v10, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t
; RV32-NEXT: vor.vv v8, v8, v10, v0.t
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t
+; RV32-NEXT: vor.vv v8, v8, v10, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT: vand.vv v10, v10, v12, v0.t
; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT: vand.vv v12, v8, v10, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
+; RV32-NEXT: vand.vv v8, v8, v10, v0.t
+; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT: vand.vv v8, v8, v10, v0.t
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
+; RV32-NEXT: li a0, 56
+; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v8i32:
+; RV64-LABEL: vp_ctlz_v4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
; RV64-NEXT: vor.vv v8, v8, v10, v0.t
; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
@@ -1184,14 +1172,21 @@ define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV64-NEXT: vor.vv v8, v8, v10, v0.t
; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t
; RV64-NEXT: vor.vv v8, v8, v10, v0.t
+; RV64-NEXT: li a0, 32
+; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t
+; RV64-NEXT: vor.vv v8, v8, v10, v0.t
; RV64-NEXT: vnot.v v8, v8, v0.t
; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
; RV64-NEXT: lui a0, 349525
; RV64-NEXT: addiw a0, a0, 1365
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v10, v10, a0, v0.t
; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
; RV64-NEXT: lui a0, 209715
; RV64-NEXT: addiw a0, a0, 819
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v10, v8, a0, v0.t
; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
@@ -1200,20 +1195,25 @@ define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
; RV64-NEXT: lui a0, 61681
; RV64-NEXT: addiw a0, a0, -241
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
; RV64-NEXT: lui a0, 4112
; RV64-NEXT: addiw a0, a0, 257
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
+; RV64-NEXT: li a0, 56
+; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
; RV64-NEXT: ret
- %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
- ret <8 x i32> %v
+ %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
}
-define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i32_unmasked:
+define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v4i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsrl.vi v10, v8, 1
; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: vsrl.vi v10, v8, 2
@@ -1224,32 +1224,48 @@ define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: vsrl.vi v10, v8, 16
; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v10, v8, a1
+; RV32-NEXT: vor.vv v8, v8, v10
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT: vand.vv v10, v10, v12
; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT: vand.vv v12, v8, v10
; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
+; RV32-NEXT: vand.vv v8, v8, v10
+; RV32-NEXT: vadd.vv v8, v12, v8
; RV32-NEXT: vsrl.vi v10, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT: vand.vv v8, v8, v10
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT: vmul.vv v8, v8, v10
+; RV32-NEXT: li a0, 56
+; RV32-NEXT: vsrl.vx v8, v8, a0
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v8i32_unmasked:
+; RV64-LABEL: vp_ctlz_v4i64_unmasked:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV64-NEXT: vsrl.vi v10, v8, 1
; RV64-NEXT: vor.vv v8, v8, v10
; RV64-NEXT: vsrl.vi v10, v8, 2
@@ -1260,14 +1276,21 @@ define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
; RV64-NEXT: vor.vv v8, v8, v10
; RV64-NEXT: vsrl.vi v10, v8, 16
; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: li a0, 32
+; RV64-NEXT: vsrl.vx v10, v8, a0
+; RV64-NEXT: vor.vv v8, v8, v10
; RV64-NEXT: vnot.v v8, v8
; RV64-NEXT: vsrl.vi v10, v8, 1
; RV64-NEXT: lui a0, 349525
; RV64-NEXT: addiw a0, a0, 1365
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v10, v10, a0
; RV64-NEXT: vsub.vv v8, v8, v10
; RV64-NEXT: lui a0, 209715
; RV64-NEXT: addiw a0, a0, 819
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v10, v8, a0
; RV64-NEXT: vsrl.vi v8, v8, 2
; RV64-NEXT: vand.vx v8, v8, a0
@@ -1276,24 +1299,29 @@ define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
; RV64-NEXT: vadd.vv v8, v8, v10
; RV64-NEXT: lui a0, 61681
; RV64-NEXT: addiw a0, a0, -241
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v8, v8, a0
; RV64-NEXT: lui a0, 4112
; RV64-NEXT: addiw a0, a0, 257
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
+; RV64-NEXT: li a0, 56
+; RV64-NEXT: vsrl.vx v8, v8, a0
; RV64-NEXT: ret
- %head = insertelement <8 x i1> poison, i1 true, i32 0
- %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
- %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
- ret <8 x i32> %v
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
}
-declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
+declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
-define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i32:
+define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v12, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
@@ -1304,32 +1332,48 @@ define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vor.vv v8, v8, v12, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t
; RV32-NEXT: vor.vv v8, v8, v12, v0.t
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t
+; RV32-NEXT: vor.vv v8, v8, v12, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT: vand.vv v12, v12, v16, v0.t
; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT: vand.vv v16, v8, v12, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
+; RV32-NEXT: vand.vv v8, v8, v12, v0.t
+; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT: vand.vv v8, v8, v12, v0.t
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
+; RV32-NEXT: li a0, 56
+; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v16i32:
+; RV64-LABEL: vp_ctlz_v8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
; RV64-NEXT: vor.vv v8, v8, v12, v0.t
; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
@@ -1340,14 +1384,21 @@ define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl
; RV64-NEXT: vor.vv v8, v8, v12, v0.t
; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t
; RV64-NEXT: vor.vv v8, v8, v12, v0.t
+; RV64-NEXT: li a0, 32
+; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t
+; RV64-NEXT: vor.vv v8, v8, v12, v0.t
; RV64-NEXT: vnot.v v8, v8, v0.t
; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
; RV64-NEXT: lui a0, 349525
; RV64-NEXT: addiw a0, a0, 1365
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v12, v12, a0, v0.t
; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
; RV64-NEXT: lui a0, 209715
; RV64-NEXT: addiw a0, a0, 819
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v12, v8, a0, v0.t
; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
@@ -1356,20 +1407,25 @@ define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl
; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
; RV64-NEXT: lui a0, 61681
; RV64-NEXT: addiw a0, a0, -241
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
; RV64-NEXT: lui a0, 4112
; RV64-NEXT: addiw a0, a0, 257
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
+; RV64-NEXT: li a0, 56
+; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
; RV64-NEXT: ret
- %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
- ret <16 x i32> %v
+ %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
}
-define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i32_unmasked:
+define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v8i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsrl.vi v12, v8, 1
; RV32-NEXT: vor.vv v8, v8, v12
; RV32-NEXT: vsrl.vi v12, v8, 2
@@ -1380,32 +1436,48 @@ define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v12
; RV32-NEXT: vsrl.vi v12, v8, 16
; RV32-NEXT: vor.vv v8, v8, v12
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v12, v8, a1
+; RV32-NEXT: vor.vv v8, v8, v12
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
+; RV32-NEXT: lui a1, 349525
+; RV32-NEXT: addi a1, a1, 1365
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v16, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT: vand.vv v12, v12, v16
; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
+; RV32-NEXT: lui a1, 209715
+; RV32-NEXT: addi a1, a1, 819
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT: vand.vv v16, v8, v12
; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
+; RV32-NEXT: vand.vv v8, v8, v12
+; RV32-NEXT: vadd.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v12, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
+; RV32-NEXT: lui a1, 61681
+; RV32-NEXT: addi a1, a1, -241
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT: vand.vv v8, v8, v12
+; RV32-NEXT: lui a1, 4112
+; RV32-NEXT: addi a1, a1, 257
+; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT: vmul.vv v8, v8, v12
+; RV32-NEXT: li a0, 56
+; RV32-NEXT: vsrl.vx v8, v8, a0
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v16i32_unmasked:
+; RV64-LABEL: vp_ctlz_v8i64_unmasked:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV64-NEXT: vsrl.vi v12, v8, 1
; RV64-NEXT: vor.vv v8, v8, v12
; RV64-NEXT: vsrl.vi v12, v8, 2
@@ -1416,14 +1488,21 @@ define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
; RV64-NEXT: vor.vv v8, v8, v12
; RV64-NEXT: vsrl.vi v12, v8, 16
; RV64-NEXT: vor.vv v8, v8, v12
+; RV64-NEXT: li a0, 32
+; RV64-NEXT: vsrl.vx v12, v8, a0
+; RV64-NEXT: vor.vv v8, v8, v12
; RV64-NEXT: vnot.v v8, v8
; RV64-NEXT: vsrl.vi v12, v8, 1
; RV64-NEXT: lui a0, 349525
; RV64-NEXT: addiw a0, a0, 1365
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v12, v12, a0
; RV64-NEXT: vsub.vv v8, v8, v12
; RV64-NEXT: lui a0, 209715
; RV64-NEXT: addiw a0, a0, 819
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v12, v8, a0
; RV64-NEXT: vsrl.vi v8, v8, 2
; RV64-NEXT: vand.vx v8, v8, a0
@@ -1432,107 +1511,127 @@ define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
; RV64-NEXT: vadd.vv v8, v8, v12
; RV64-NEXT: lui a0, 61681
; RV64-NEXT: addiw a0, a0, -241
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vand.vx v8, v8, a0
; RV64-NEXT: lui a0, 4112
; RV64-NEXT: addiw a0, a0, 257
+; RV64-NEXT: slli a1, a0, 32
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
+; RV64-NEXT: li a0, 56
+; RV64-NEXT: vsrl.vx v8, v8, a0
; RV64-NEXT: ret
- %head = insertelement <16 x i1> poison, i1 true, i32 0
- %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
- %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
- ret <16 x i32> %v
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
}
-declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
+declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32)
-define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i64:
+define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v15i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v10, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v9, v9, v10, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
+; RV32-NEXT: sw a1, 28(sp)
+; RV32-NEXT: sw a1, 24(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v9, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v10, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a1, 16(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v9, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v9, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v9, v0.t
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vnot.v v8, v8, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT: addi a1, sp, 24
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v24, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v16, v16, v24, v0.t
+; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v24, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT: vand.vv v8, v8, v16, v0.t
+; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v8, v8, v16, v0.t
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
+; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v2i64:
+; RV64-LABEL: vp_ctlz_v15i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
+; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV64-NEXT: lui a0, 349525
; RV64-NEXT: addiw a0, a0, 1365
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
+; RV64-NEXT: vand.vx v16, v16, a0, v0.t
+; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
; RV64-NEXT: lui a0, 209715
; RV64-NEXT: addiw a0, a0, 819
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
+; RV64-NEXT: vand.vx v16, v8, a0, v0.t
; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
+; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
; RV64-NEXT: lui a0, 61681
; RV64-NEXT: addiw a0, a0, -241
; RV64-NEXT: slli a1, a0, 32
@@ -1546,97 +1645,112 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV64-NEXT: li a0, 56
; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
; RV64-NEXT: ret
- %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl)
- ret <2 x i64> %v
+ %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
+ ret <15 x i64> %v
}
-define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v2i64_unmasked:
+define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v15i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v9, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v10, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v9, v9, v10
-; RV32-NEXT: vsub.vv v8, v8, v9
+; RV32-NEXT: sw a1, 28(sp)
+; RV32-NEXT: sw a1, 24(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v9, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v10, v8, v9
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a1, 16(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v9, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v9
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v9, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v9
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v2i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v9, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vsrl.vi v16, v8, 1
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: vsrl.vi v16, v8, 2
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: vsrl.vi v16, v8, 4
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: vsrl.vi v16, v8, 8
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: vsrl.vi v16, v8, 16
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v16, v8, a1
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: vnot.v v8, v8
+; RV32-NEXT: vsrl.vi v16, v8, 1
+; RV32-NEXT: addi a1, sp, 24
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v24, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v16, v16, v24
+; RV32-NEXT: vsub.vv v8, v8, v16
+; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v24, v8, v16
+; RV32-NEXT: vsrl.vi v8, v8, 2
+; RV32-NEXT: vand.vv v8, v8, v16
+; RV32-NEXT: vadd.vv v8, v24, v8
+; RV32-NEXT: vsrl.vi v16, v8, 4
+; RV32-NEXT: vadd.vv v8, v8, v16
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v8, v8, v16
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vmul.vv v8, v8, v16
+; RV32-NEXT: li a0, 56
+; RV32-NEXT: vsrl.vx v8, v8, a0
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_ctlz_v15i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vsrl.vi v16, v8, 1
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: vsrl.vi v16, v8, 2
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: vsrl.vi v16, v8, 4
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: vsrl.vi v16, v8, 8
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: vsrl.vi v16, v8, 16
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: li a0, 32
+; RV64-NEXT: vsrl.vx v16, v8, a0
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: vnot.v v8, v8
+; RV64-NEXT: vsrl.vi v16, v8, 1
+; RV64-NEXT: lui a0, 349525
; RV64-NEXT: addiw a0, a0, 1365
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
+; RV64-NEXT: vand.vx v16, v16, a0
+; RV64-NEXT: vsub.vv v8, v8, v16
; RV64-NEXT: lui a0, 209715
; RV64-NEXT: addiw a0, a0, 819
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v9, v8, a0
+; RV64-NEXT: vand.vx v16, v8, a0
; RV64-NEXT: vsrl.vi v8, v8, 2
; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
+; RV64-NEXT: vadd.vv v8, v16, v8
+; RV64-NEXT: vsrl.vi v16, v8, 4
+; RV64-NEXT: vadd.vv v8, v8, v16
; RV64-NEXT: lui a0, 61681
; RV64-NEXT: addiw a0, a0, -241
; RV64-NEXT: slli a1, a0, 32
@@ -1650,101 +1764,116 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV64-NEXT: li a0, 56
; RV64-NEXT: vsrl.vx v8, v8, a0
; RV64-NEXT: ret
- %head = insertelement <2 x i1> poison, i1 true, i32 0
- %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
- %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl)
- ret <2 x i64> %v
+ %head = insertelement <15 x i1> poison, i1 true, i32 0
+ %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
+ %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
+ ret <15 x i64> %v
}
-declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
+declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
-define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i64:
+define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v16i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v12, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v10, v10, v12, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
+; RV32-NEXT: sw a1, 28(sp)
+; RV32-NEXT: sw a1, 24(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v12, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a1, 16(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v10, v0.t
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
+; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vnot.v v8, v8, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT: addi a1, sp, 24
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v24, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v16, v16, v24, v0.t
+; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v24, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT: vand.vv v8, v8, v16, v0.t
+; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v8, v8, v16, v0.t
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
+; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v4i64:
+; RV64-LABEL: vp_ctlz_v16i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
+; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV64-NEXT: lui a0, 349525
; RV64-NEXT: addiw a0, a0, 1365
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
+; RV64-NEXT: vand.vx v16, v16, a0, v0.t
+; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
; RV64-NEXT: lui a0, 209715
; RV64-NEXT: addiw a0, a0, 819
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
+; RV64-NEXT: vand.vx v16, v8, a0, v0.t
; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
+; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
; RV64-NEXT: lui a0, 61681
; RV64-NEXT: addiw a0, a0, -241
; RV64-NEXT: slli a1, a0, 32
@@ -1758,97 +1887,112 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV64-NEXT: li a0, 56
; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
; RV64-NEXT: ret
- %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl)
- ret <4 x i64> %v
+ %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
}
-define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v4i64_unmasked:
+define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v16i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v10, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v12, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v10, v10, v12
-; RV32-NEXT: vsub.vv v8, v8, v10
+; RV32-NEXT: sw a1, 28(sp)
+; RV32-NEXT: sw a1, 24(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v12, v8, v10
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: sw a1, 16(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v10
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v10
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vsrl.vi v16, v8, 1
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: vsrl.vi v16, v8, 2
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: vsrl.vi v16, v8, 4
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: vsrl.vi v16, v8, 8
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: vsrl.vi v16, v8, 16
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsrl.vx v16, v8, a1
+; RV32-NEXT: vor.vv v8, v8, v16
+; RV32-NEXT: vnot.v v8, v8
+; RV32-NEXT: vsrl.vi v16, v8, 1
+; RV32-NEXT: addi a1, sp, 24
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v24, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v16, v16, v24
+; RV32-NEXT: vsub.vv v8, v8, v16
+; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v24, v8, v16
+; RV32-NEXT: vsrl.vi v8, v8, 2
+; RV32-NEXT: vand.vv v8, v8, v16
+; RV32-NEXT: vadd.vv v8, v24, v8
+; RV32-NEXT: vsrl.vi v16, v8, 4
+; RV32-NEXT: vadd.vv v8, v8, v16
+; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v8, v8, v16
+; RV32-NEXT: mv a1, sp
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a1), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsrl.vx v8, v8, a0
+; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v4i64_unmasked:
+; RV64-LABEL: vp_ctlz_v16i64_unmasked:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vsrl.vi v16, v8, 1
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: vsrl.vi v16, v8, 2
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: vsrl.vi v16, v8, 4
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: vsrl.vi v16, v8, 8
+; RV64-NEXT: vor.vv v8, v8, v16
+; RV64-NEXT: vsrl.vi v16, v8, 16
+; RV64-NEXT: vor.vv v8, v8, v16
; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v10, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vsrl.vx v16, v8, a0
+; RV64-NEXT: vor.vv v8, v8, v16
; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
+; RV64-NEXT: vsrl.vi v16, v8, 1
; RV64-NEXT: lui a0, 349525
; RV64-NEXT: addiw a0, a0, 1365
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
+; RV64-NEXT: vand.vx v16, v16, a0
+; RV64-NEXT: vsub.vv v8, v8, v16
; RV64-NEXT: lui a0, 209715
; RV64-NEXT: addiw a0, a0, 819
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v10, v8, a0
+; RV64-NEXT: vand.vx v16, v8, a0
; RV64-NEXT: vsrl.vi v8, v8, 2
; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
+; RV64-NEXT: vadd.vv v8, v16, v8
+; RV64-NEXT: vsrl.vi v16, v8, 4
+; RV64-NEXT: vadd.vv v8, v8, v16
; RV64-NEXT: lui a0, 61681
; RV64-NEXT: addiw a0, a0, -241
; RV64-NEXT: slli a1, a0, 32
@@ -1862,248 +2006,54 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV64-NEXT: li a0, 56
; RV64-NEXT: vsrl.vx v8, v8, a0
; RV64-NEXT: ret
- %head = insertelement <4 x i1> poison, i1 true, i32 0
- %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
- %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl)
- ret <4 x i64> %v
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
}
-declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
+declare <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32)
-define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i64:
+define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v32i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: .cfi_def_cfa_offset 48
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 56
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: sub sp, sp, a1
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 48
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v24, v0, 2
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.v.x v16, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v12, v12, v16, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
+; RV32-NEXT: sw a1, 44(sp)
+; RV32-NEXT: sw a1, 40(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.v.x v12, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v16, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
+; RV32-NEXT: sw a1, 36(sp)
+; RV32-NEXT: sw a1, 32(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.v.x v12, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
+; RV32-NEXT: sw a1, 28(sp)
+; RV32-NEXT: sw a1, 24(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.v.x v12, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v12, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
- %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl)
- ret <8 x i64> %v
-}
-
-define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v8i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v12, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.v.x v16, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v12, v12, v16
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.v.x v12, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v16, v8, v12
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.v.x v12, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.v.x v12, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v12
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v8i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v12, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
- %head = insertelement <8 x i1> poison, i1 true, i32 0
- %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
- %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl)
- ret <8 x i64> %v
-}
-
-declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32)
-
-define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v15i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: li a3, 16
; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: bltu a0, a3, .LBB34_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: .LBB34_2:
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
@@ -2118,236 +2068,128 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 40
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 48
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: addi a3, sp, 40
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
+; RV32-NEXT: vlse64.v v8, (a3), zero
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 48
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v16, v16, v8, v0.t
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 40
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 40
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: addi a3, sp, 32
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT: vlse64.v v8, (a3), zero
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 48
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 40
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v16, v16, v8, v0.t
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: slli a3, a3, 4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 40
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
+; RV32-NEXT: vand.vv v16, v16, v8, v0.t
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: slli a3, a3, 4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT: vadd.vv v16, v8, v16, v0.t
+; RV32-NEXT: addi a3, sp, 24
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: mv a1, sp
+; RV32-NEXT: vlse64.v v8, (a3), zero
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 40
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v8, v16, v8, v0.t
+; RV32-NEXT: addi a3, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a3), zero
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: slli a3, a3, 3
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v15i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV64-NEXT: ret
- %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
- ret <15 x i64> %v
-}
-
-define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v15i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v15i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
- %head = insertelement <15 x i1> poison, i1 true, i32 0
- %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
- %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
- ret <15 x i64> %v
-}
-
-declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
-
-define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: li a2, 56
+; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: slli a3, a3, 4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: addi a3, a0, -16
+; RV32-NEXT: sltu a0, a0, a3
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
+; RV32-NEXT: vmv1r.v v0, v24
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 5
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
+; RV32-NEXT: vor.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
@@ -2356,45 +2198,108 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a1, 32
; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
; RV32-NEXT: vnot.v v8, v8, v0.t
+; RV32-NEXT: addi a0, sp, 48
+; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: addi a1, sp, 24
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 5
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: li a1, 24
+; RV32-NEXT: mul a0, a0, a1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 5
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v16, v8, v16, v0.t
+; RV32-NEXT: addi a0, sp, 48
+; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: li a1, 48
+; RV32-NEXT: mul a0, a0, a1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v16, v8, v16, v0.t
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 5
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: li a1, 48
+; RV32-NEXT: mul a0, a0, a1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 5
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: addi a1, sp, 8
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: li a1, 40
+; RV32-NEXT: mul a0, a0, a1
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: mv a1, sp
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 3
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 4
+; RV32-NEXT: add a0, sp, a0
+; RV32-NEXT: addi a0, a0, 48
+; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: li a1, 56
+; RV32-NEXT: mul a0, a0, a1
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v16i64:
+; RV64-LABEL: vp_ctlz_v32i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 4
+; RV64-NEXT: sub sp, sp, a1
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bltu a0, a2, .LBB34_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a1, 16
+; RV64-NEXT: .LBB34_2:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
@@ -2405,66 +2310,121 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl
; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t
; RV64-NEXT: vor.vv v8, v8, v16, v0.t
; RV64-NEXT: vnot.v v8, v8, v0.t
; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0, v0.t
+; RV64-NEXT: lui a2, 349525
+; RV64-NEXT: addiw a2, a2, 1365
+; RV64-NEXT: slli a3, a2, 32
+; RV64-NEXT: add a2, a2, a3
+; RV64-NEXT: vand.vx v16, v16, a2, v0.t
; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0, v0.t
+; RV64-NEXT: lui a3, 209715
+; RV64-NEXT: addiw a3, a3, 819
+; RV64-NEXT: slli a4, a3, 32
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: vand.vx v16, v8, a3, v0.t
; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
+; RV64-NEXT: vand.vx v8, v8, a3, v0.t
; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT: lui a4, 61681
+; RV64-NEXT: addiw a4, a4, -241
+; RV64-NEXT: slli a5, a4, 32
+; RV64-NEXT: add a4, a4, a5
+; RV64-NEXT: vand.vx v8, v8, a4, v0.t
+; RV64-NEXT: lui a5, 4112
+; RV64-NEXT: addiw a5, a5, 257
+; RV64-NEXT: slli a6, a5, 32
+; RV64-NEXT: add a5, a5, a6
+; RV64-NEXT: vmul.vx v8, v8, a5, v0.t
+; RV64-NEXT: li a6, 56
+; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t
+; RV64-NEXT: addi a7, sp, 16
+; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill
+; RV64-NEXT: addi a7, a0, -16
+; RV64-NEXT: sltu a0, a0, a7
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a0, a7
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: add a0, sp, a0
+; RV64-NEXT: addi a0, a0, 16
+; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT: vor.vv v16, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t
+; RV64-NEXT: vor.vv v8, v16, v8, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t
+; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vnot.v v8, v8, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT: vand.vx v16, v16, a2, v0.t
+; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
+; RV64-NEXT: vand.vx v16, v8, a3, v0.t
+; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
+; RV64-NEXT: vand.vx v8, v8, a3, v0.t
+; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT: vand.vx v8, v8, a4, v0.t
+; RV64-NEXT: vmul.vx v8, v8, a5, v0.t
+; RV64-NEXT: vsrl.vx v16, v8, a6, v0.t
+; RV64-NEXT: addi a0, sp, 16
+; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 4
+; RV64-NEXT: add sp, sp, a0
+; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
- %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
- ret <16 x i64> %v
+ %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
}
-define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v16i64_unmasked:
+define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctlz_v32i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: addi sp, sp, -48
+; RV32-NEXT: .cfi_def_cfa_offset 48
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: sub sp, sp, a1
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
+; RV32-NEXT: vmv8r.v v24, v16
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 44(sp)
+; RV32-NEXT: sw a1, 40(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 36(sp)
+; RV32-NEXT: sw a1, 32(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 28(sp)
+; RV32-NEXT: sw a1, 24(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: sw a1, 20(sp)
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: bltu a0, a2, .LBB35_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB35_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 1
; RV32-NEXT: vor.vv v8, v8, v16
; RV32-NEXT: vsrl.vi v16, v8, 2
@@ -2475,708 +2435,196 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vor.vv v8, v8, v16
; RV32-NEXT: vsrl.vi v16, v8, 16
; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsrl.vx v16, v8, a2
; RV32-NEXT: vor.vv v8, v8, v16
; RV32-NEXT: vnot.v v8, v8
; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: addi a1, sp, 24
+; RV32-NEXT: addi a3, sp, 40
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v24
+; RV32-NEXT: vlse64.v v0, (a3), zero
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v16, v16, v0
; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: addi a3, sp, 32
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v24, v8, v16
+; RV32-NEXT: vlse64.v v0, (a3), zero
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vand.vv v16, v8, v0
; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: vadd.vv v8, v24, v8
+; RV32-NEXT: vand.vv v8, v8, v0
+; RV32-NEXT: vadd.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v16, v8, 4
; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: addi a3, sp, 24
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a3), zero
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: slli a3, a3, 4
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 48
+; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: mv a1, sp
+; RV32-NEXT: addi a3, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a1), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a3), zero
+; RV32-NEXT: addi a3, sp, 48
+; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: li a0, 56
-; RV32-NEXT: vsrl.vx v8, v8, a0
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v16i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vsrl.vi v16, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsrl.vx v16, v8, a0
-; RV64-NEXT: vor.vv v8, v8, v16
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v16, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v16, a0
-; RV64-NEXT: vsub.vv v8, v8, v16
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v16, v8
-; RV64-NEXT: vsrl.vi v16, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: slli a1, a0, 32
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: li a0, 56
-; RV64-NEXT: vsrl.vx v8, v8, a0
-; RV64-NEXT: ret
- %head = insertelement <16 x i1> poison, i1 true, i32 0
- %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
- %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
- ret <16 x i64> %v
-}
-
-declare <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32)
-
-define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v32i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -48
-; RV32-NEXT: .cfi_def_cfa_offset 48
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 56
-; RV32-NEXT: mul a1, a1, a2
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 48
-; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v24, v0, 2
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
-; RV32-NEXT: sw a1, 40(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
-; RV32-NEXT: sw a1, 32(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: li a3, 16
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: mv a2, a0
-; RV32-NEXT: bltu a0, a3, .LBB34_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB34_2:
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 40
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 48
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: addi a3, sp, 40
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v8, (a3), zero
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 24
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 48
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v16, v8, v0.t
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 40
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 40
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: addi a3, sp, 32
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v8, (a3), zero
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 48
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 40
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v16, v8, v0.t
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 40
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
-; RV32-NEXT: vand.vv v16, v16, v8, v0.t
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v16, v8, v16, v0.t
-; RV32-NEXT: addi a3, sp, 24
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v8, (a3), zero
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 40
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v16, v8, v0.t
-; RV32-NEXT: addi a3, sp, 16
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a3), zero
+; RV32-NEXT: li a1, 56
+; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: li a2, 56
-; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: addi a3, a0, -16
; RV32-NEXT: sltu a0, a0, a3
; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vmv1r.v v0, v24
+; RV32-NEXT: vsrl.vi v8, v24, 1
+; RV32-NEXT: vor.vv v8, v24, v8
+; RV32-NEXT: vsrl.vi v24, v8, 2
+; RV32-NEXT: vor.vv v8, v8, v24
+; RV32-NEXT: vsrl.vi v24, v8, 4
+; RV32-NEXT: vor.vv v8, v8, v24
+; RV32-NEXT: vsrl.vi v24, v8, 8
+; RV32-NEXT: vor.vv v8, v8, v24
+; RV32-NEXT: vsrl.vi v24, v8, 16
+; RV32-NEXT: vor.vv v8, v8, v24
+; RV32-NEXT: vsrl.vx v24, v8, a2
+; RV32-NEXT: vor.vv v8, v8, v24
+; RV32-NEXT: vnot.v v8, v8
+; RV32-NEXT: vsrl.vi v24, v8, 1
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
+; RV32-NEXT: li a2, 24
+; RV32-NEXT: mul a0, a0, a2
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
-; RV32-NEXT: vor.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v16, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: addi a0, sp, 48
-; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vand.vv v24, v24, v16
+; RV32-NEXT: vsub.vv v8, v8, v24
+; RV32-NEXT: vand.vv v24, v8, v0
+; RV32-NEXT: vsrl.vi v8, v8, 2
+; RV32-NEXT: vand.vv v8, v8, v0
+; RV32-NEXT: vadd.vv v8, v24, v8
+; RV32-NEXT: vsrl.vi v24, v8, 4
+; RV32-NEXT: vadd.vv v8, v8, v24
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
+; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v8, v16, v0.t
+; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: addi a0, sp, 48
-; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 48
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 48
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 40
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
+; RV32-NEXT: vmul.vv v8, v8, v16
+; RV32-NEXT: vsrl.vx v16, v8, a1
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 56
-; RV32-NEXT: mul a0, a0, a1
+; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
-; RV64-LABEL: vp_ctlz_v32i64:
+; RV64-LABEL: vp_ctlz_v32i64_unmasked:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 4
-; RV64-NEXT: sub sp, sp, a1
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64-NEXT: li a2, 16
-; RV64-NEXT: vslidedown.vi v24, v0, 2
; RV64-NEXT: mv a1, a0
-; RV64-NEXT: bltu a0, a2, .LBB34_2
+; RV64-NEXT: bltu a0, a2, .LBB35_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB34_2:
+; RV64-NEXT: .LBB35_2:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
+; RV64-NEXT: vsrl.vi v24, v8, 1
+; RV64-NEXT: vor.vv v8, v8, v24
+; RV64-NEXT: vsrl.vi v24, v8, 2
+; RV64-NEXT: vor.vv v8, v8, v24
+; RV64-NEXT: vsrl.vi v24, v8, 4
+; RV64-NEXT: vor.vv v8, v8, v24
+; RV64-NEXT: vsrl.vi v24, v8, 8
+; RV64-NEXT: vor.vv v8, v8, v24
+; RV64-NEXT: vsrl.vi v24, v8, 16
+; RV64-NEXT: vor.vv v8, v8, v24
; RV64-NEXT: li a1, 32
-; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT: vsrl.vx v24, v8, a1
+; RV64-NEXT: vor.vv v8, v8, v24
+; RV64-NEXT: vnot.v v8, v8
+; RV64-NEXT: vsrl.vi v24, v8, 1
; RV64-NEXT: lui a2, 349525
; RV64-NEXT: addiw a2, a2, 1365
; RV64-NEXT: slli a3, a2, 32
; RV64-NEXT: add a2, a2, a3
-; RV64-NEXT: vand.vx v16, v16, a2, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
+; RV64-NEXT: vand.vx v24, v24, a2
+; RV64-NEXT: vsub.vv v8, v8, v24
; RV64-NEXT: lui a3, 209715
; RV64-NEXT: addiw a3, a3, 819
; RV64-NEXT: slli a4, a3, 32
; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: vand.vx v16, v8, a3, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a3, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT: vand.vx v24, v8, a3
+; RV64-NEXT: vsrl.vi v8, v8, 2
+; RV64-NEXT: vand.vx v8, v8, a3
+; RV64-NEXT: vadd.vv v8, v24, v8
+; RV64-NEXT: vsrl.vi v24, v8, 4
+; RV64-NEXT: vadd.vv v8, v8, v24
; RV64-NEXT: lui a4, 61681
; RV64-NEXT: addiw a4, a4, -241
; RV64-NEXT: slli a5, a4, 32
; RV64-NEXT: add a4, a4, a5
-; RV64-NEXT: vand.vx v8, v8, a4, v0.t
+; RV64-NEXT: vand.vx v8, v8, a4
; RV64-NEXT: lui a5, 4112
; RV64-NEXT: addiw a5, a5, 257
; RV64-NEXT: slli a6, a5, 32
; RV64-NEXT: add a5, a5, a6
-; RV64-NEXT: vmul.vx v8, v8, a5, v0.t
+; RV64-NEXT: vmul.vx v8, v8, a5
; RV64-NEXT: li a6, 56
-; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t
-; RV64-NEXT: addi a7, sp, 16
-; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill
+; RV64-NEXT: vsrl.vx v8, v8, a6
; RV64-NEXT: addi a7, a0, -16
; RV64-NEXT: sltu a0, a0, a7
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: and a0, a0, a7
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vmv1r.v v0, v24
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vor.vv v16, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t
-; RV64-NEXT: vor.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v16, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT: vand.vx v16, v16, a2, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT: vand.vx v16, v8, a3, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a3, v0.t
-; RV64-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT: vand.vx v8, v8, a4, v0.t
-; RV64-NEXT: vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT: vsrl.vx v16, v8, a6, v0.t
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
-; RV64-NEXT: add sp, sp, a0
-; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: vsrl.vi v24, v16, 1
+; RV64-NEXT: vor.vv v16, v16, v24
+; RV64-NEXT: vsrl.vi v24, v16, 2
+; RV64-NEXT: vor.vv v16, v16, v24
+; RV64-NEXT: vsrl.vi v24, v16, 4
+; RV64-NEXT: vor.vv v16, v16, v24
+; RV64-NEXT: vsrl.vi v24, v16, 8
+; RV64-NEXT: vor.vv v16, v16, v24
+; RV64-NEXT: vsrl.vi v24, v16, 16
+; RV64-NEXT: vor.vv v16, v16, v24
+; RV64-NEXT: vsrl.vx v24, v16, a1
+; RV64-NEXT: vor.vv v16, v16, v24
+; RV64-NEXT: vnot.v v16, v16
+; RV64-NEXT: vsrl.vi v24, v16, 1
+; RV64-NEXT: vand.vx v24, v24, a2
+; RV64-NEXT: vsub.vv v16, v16, v24
+; RV64-NEXT: vand.vx v24, v16, a3
+; RV64-NEXT: vsrl.vi v16, v16, 2
+; RV64-NEXT: vand.vx v16, v16, a3
+; RV64-NEXT: vadd.vv v16, v24, v16
+; RV64-NEXT: vsrl.vi v24, v16, 4
+; RV64-NEXT: vadd.vv v16, v16, v24
+; RV64-NEXT: vand.vx v16, v16, a4
+; RV64-NEXT: vmul.vx v16, v16, a5
+; RV64-NEXT: vsrl.vx v16, v16, a6
; RV64-NEXT: ret
- %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl)
- ret <32 x i64> %v
-}
-
-define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_v32i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -48
-; RV32-NEXT: .cfi_def_cfa_offset 48
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
-; RV32-NEXT: sub sp, sp, a1
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
-; RV32-NEXT: vmv8r.v v24, v16
-; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
-; RV32-NEXT: sw a1, 40(sp)
-; RV32-NEXT: lui a1, 209715
-; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
-; RV32-NEXT: sw a1, 32(sp)
-; RV32-NEXT: lui a1, 61681
-; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lui a1, 4112
-; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: sw a1, 16(sp)
-; RV32-NEXT: mv a1, a0
-; RV32-NEXT: bltu a0, a2, .LBB35_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a1, 16
-; RV32-NEXT: .LBB35_2:
-; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vsrl.vi v16, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsrl.vx v16, v8, a2
-; RV32-NEXT: vor.vv v8, v8, v16
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v16, v8, 1
-; RV32-NEXT: addi a3, sp, 40
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v0, (a3), zero
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 24
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v16, v0
-; RV32-NEXT: vsub.vv v8, v8, v16
-; RV32-NEXT: addi a3, sp, 32
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v0, (a3), zero
-; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v16, v8, v0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v0
-; RV32-NEXT: vadd.vv v8, v16, v8
-; RV32-NEXT: vsrl.vi v16, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: addi a3, sp, 24
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a3), zero
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: addi a3, sp, 16
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a3), zero
-; RV32-NEXT: addi a3, sp, 48
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: li a1, 56
-; RV32-NEXT: vsrl.vx v8, v8, a1
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 3
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 48
-; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: addi a3, a0, -16
-; RV32-NEXT: sltu a0, a0, a3
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a0, a0, a3
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v8, v24, 1
-; RV32-NEXT: vor.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v24, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vi v24, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vsrl.vx v24, v8, a2
-; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v24, v8, 1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a2, 24
-; RV32-NEXT: mul a0, a0, a2
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v24, v16
-; RV32-NEXT: vsub.vv v8, v8, v24
-; RV32-NEXT: vand.vv v24, v8, v0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vv v8, v8, v0
-; RV32-NEXT: vadd.vv v8, v24, v8
-; RV32-NEXT: vsrl.vi v24, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v24
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16
-; RV32-NEXT: addi a0, sp, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: vsrl.vx v16, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 48
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_v32i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a2, 16
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: bltu a0, a2, .LBB35_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB35_2:
-; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v24, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vsrl.vi v24, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: li a1, 32
-; RV64-NEXT: vsrl.vx v24, v8, a1
-; RV64-NEXT: vor.vv v8, v8, v24
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v24, v8, 1
-; RV64-NEXT: lui a2, 349525
-; RV64-NEXT: addiw a2, a2, 1365
-; RV64-NEXT: slli a3, a2, 32
-; RV64-NEXT: add a2, a2, a3
-; RV64-NEXT: vand.vx v24, v24, a2
-; RV64-NEXT: vsub.vv v8, v8, v24
-; RV64-NEXT: lui a3, 209715
-; RV64-NEXT: addiw a3, a3, 819
-; RV64-NEXT: slli a4, a3, 32
-; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: vand.vx v24, v8, a3
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a3
-; RV64-NEXT: vadd.vv v8, v24, v8
-; RV64-NEXT: vsrl.vi v24, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v24
-; RV64-NEXT: lui a4, 61681
-; RV64-NEXT: addiw a4, a4, -241
-; RV64-NEXT: slli a5, a4, 32
-; RV64-NEXT: add a4, a4, a5
-; RV64-NEXT: vand.vx v8, v8, a4
-; RV64-NEXT: lui a5, 4112
-; RV64-NEXT: addiw a5, a5, 257
-; RV64-NEXT: slli a6, a5, 32
-; RV64-NEXT: add a5, a5, a6
-; RV64-NEXT: vmul.vx v8, v8, a5
-; RV64-NEXT: li a6, 56
-; RV64-NEXT: vsrl.vx v8, v8, a6
-; RV64-NEXT: addi a7, a0, -16
-; RV64-NEXT: sltu a0, a0, a7
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a0, a0, a7
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsrl.vi v24, v16, 1
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 2
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 4
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 8
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vi v24, v16, 16
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vsrl.vx v24, v16, a1
-; RV64-NEXT: vor.vv v16, v16, v24
-; RV64-NEXT: vnot.v v16, v16
-; RV64-NEXT: vsrl.vi v24, v16, 1
-; RV64-NEXT: vand.vx v24, v24, a2
-; RV64-NEXT: vsub.vv v16, v16, v24
-; RV64-NEXT: vand.vx v24, v16, a3
-; RV64-NEXT: vsrl.vi v16, v16, 2
-; RV64-NEXT: vand.vx v16, v16, a3
-; RV64-NEXT: vadd.vv v16, v24, v16
-; RV64-NEXT: vsrl.vi v24, v16, 4
-; RV64-NEXT: vadd.vv v16, v16, v24
-; RV64-NEXT: vand.vx v16, v16, a4
-; RV64-NEXT: vmul.vx v16, v16, a5
-; RV64-NEXT: vsrl.vx v16, v16, a6
-; RV64-NEXT: ret
- %head = insertelement <32 x i1> poison, i1 true, i32 0
- %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+ %head = insertelement <32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
%v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl)
ret <32 x i64> %v
}
@@ -3414,141 +2862,75 @@ define <16 x i8> @vp_ctlz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext %
}
define <2 x i16> @vp_ctlz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl)
ret <2 x i16> %v
}
define <2 x i16> @vp_ctlz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl)
@@ -3556,141 +2938,75 @@ define <2 x i16> @vp_ctlz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %
}
define <4 x i16> @vp_ctlz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl)
ret <4 x i16> %v
}
define <4 x i16> @vp_ctlz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl)
@@ -3698,141 +3014,75 @@ define <4 x i16> @vp_ctlz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %
}
define <8 x i16> @vp_ctlz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl)
ret <8 x i16> %v
}
define <8 x i16> @vp_ctlz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl)
@@ -3840,141 +3090,75 @@ define <8 x i16> @vp_ctlz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %
}
define <16 x i16> @vp_ctlz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl)
ret <16 x i16> %v
}
define <16 x i16> @vp_ctlz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl)
@@ -3982,153 +3166,81 @@ define <16 x i16> @vp_ctlz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroex
}
define <2 x i32> @vp_ctlz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
ret <2 x i32> %v
}
define <2 x i32> @vp_ctlz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 16
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
@@ -4136,153 +3248,81 @@ define <2 x i32> @vp_ctlz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %
}
define <4 x i32> @vp_ctlz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v9, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v9, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl)
ret <4 x i32> %v
-}
-
-define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+}
+
+define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_zero_undef_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 16
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl)
@@ -4290,153 +3330,81 @@ define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %
}
define <8 x i32> @vp_ctlz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v10, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v10, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl)
ret <8 x i32> %v
}
define <8 x i32> @vp_ctlz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 16
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl)
@@ -4444,153 +3412,81 @@ define <8 x i32> @vp_ctlz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %
}
define <16 x i32> @vp_ctlz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV32-NEXT: vor.vv v8, v8, v12, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t
-; RV64-NEXT: vor.vv v8, v8, v12, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 16, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl)
ret <16 x i32> %v
}
define <16 x i32> @vp_ctlz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctlz_zero_undef_v16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 2
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 8
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 16
-; RV32-NEXT: vor.vv v8, v8, v12
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctlz_zero_undef_v16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 2
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 8
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 16
-; RV64-NEXT: vor.vv v8, v8, v12
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctlz_zero_undef_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 2
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 8
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 16
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 9ad8beac56c4b75..9c30d3ac71679ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -123,11 +123,11 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 5
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 3
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -135,7 +135,7 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 1
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: li a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
@@ -143,75 +143,40 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0)
; LMULMAX2-RV64I-NEXT: ret
;
-; LMULMAX1-RV32-LABEL: ctlz_v8i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vnot.v v8, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT: lui a1, 5
-; LMULMAX1-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 3
-; LMULMAX1-RV32-NEXT: addi a1, a1, 819
-; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 1
-; LMULMAX1-RV32-NEXT: addi a1, a1, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: li a1, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: ctlz_v8i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vnot.v v8, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT: lui a1, 5
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 3
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 1
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: li a1, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: ctlz_v8i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT: vle16.v v8, (a0)
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT: vor.vv v8, v8, v9
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 2
+; LMULMAX1-NEXT: vor.vv v8, v8, v9
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT: vor.vv v8, v8, v9
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 8
+; LMULMAX1-NEXT: vor.vv v8, v8, v9
+; LMULMAX1-NEXT: vnot.v v8, v8
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT: lui a1, 5
+; LMULMAX1-NEXT: addi a1, a1, 1365
+; LMULMAX1-NEXT: vand.vx v9, v9, a1
+; LMULMAX1-NEXT: vsub.vv v8, v8, v9
+; LMULMAX1-NEXT: lui a1, 3
+; LMULMAX1-NEXT: addi a1, a1, 819
+; LMULMAX1-NEXT: vand.vx v9, v8, a1
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT: vand.vx v8, v8, a1
+; LMULMAX1-NEXT: vadd.vv v8, v9, v8
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT: vadd.vv v8, v8, v9
+; LMULMAX1-NEXT: lui a1, 1
+; LMULMAX1-NEXT: addi a1, a1, -241
+; LMULMAX1-NEXT: vand.vx v8, v8, a1
+; LMULMAX1-NEXT: li a1, 257
+; LMULMAX1-NEXT: vmul.vx v8, v8, a1
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT: vse16.v v8, (a0)
+; LMULMAX1-NEXT: ret
;
; LMULMAX2-RV32F-LABEL: ctlz_v8i16:
; LMULMAX2-RV32F: # %bb.0:
@@ -349,11 +314,11 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 349525
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 209715
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -361,10 +326,10 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 61681
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: lui a1, 4112
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
@@ -769,193 +734,99 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind {
declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
define void @ctlz_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32-LABEL: ctlz_v16i16:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2
-; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8
-; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT: lui a1, 5
-; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 3
-; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 1
-; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: li a1, 257
-; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: ctlz_v16i16:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2
-; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8
-; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vnot.v v8, v8
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT: lui a1, 5
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 3
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 1
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: li a1, 257
-; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
-;
-; LMULMAX1-RV32-LABEL: ctlz_v16i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vnot.v v8, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT: lui a2, 5
-; LMULMAX1-RV32-NEXT: addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a3, 3
-; LMULMAX1-RV32-NEXT: addi a3, a3, 819
-; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a4, 1
-; LMULMAX1-RV32-NEXT: addi a4, a4, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT: li a5, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2
-; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8
-; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vnot.v v9, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: ctlz_v16i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vnot.v v8, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT: lui a2, 5
-; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a3, 3
-; LMULMAX1-RV64-NEXT: addiw a3, a3, 819
-; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a4, 1
-; LMULMAX1-RV64-NEXT: addiw a4, a4, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT: li a5, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2
-; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8
-; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vnot.v v9, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX2-LABEL: ctlz_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT: vle16.v v8, (a0)
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT: vor.vv v8, v8, v10
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 2
+; LMULMAX2-NEXT: vor.vv v8, v8, v10
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT: vor.vv v8, v8, v10
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 8
+; LMULMAX2-NEXT: vor.vv v8, v8, v10
+; LMULMAX2-NEXT: vnot.v v8, v8
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT: lui a1, 5
+; LMULMAX2-NEXT: addi a1, a1, 1365
+; LMULMAX2-NEXT: vand.vx v10, v10, a1
+; LMULMAX2-NEXT: vsub.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 3
+; LMULMAX2-NEXT: addi a1, a1, 819
+; LMULMAX2-NEXT: vand.vx v10, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 1
+; LMULMAX2-NEXT: addi a1, a1, -241
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: li a1, 257
+; LMULMAX2-NEXT: vmul.vx v8, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX2-NEXT: vse16.v v8, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: ctlz_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle16.v v8, (a1)
+; LMULMAX1-NEXT: vle16.v v9, (a0)
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT: vor.vv v8, v8, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 2
+; LMULMAX1-NEXT: vor.vv v8, v8, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT: vor.vv v8, v8, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 8
+; LMULMAX1-NEXT: vor.vv v8, v8, v10
+; LMULMAX1-NEXT: vnot.v v8, v8
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT: lui a2, 5
+; LMULMAX1-NEXT: addi a2, a2, 1365
+; LMULMAX1-NEXT: vand.vx v10, v10, a2
+; LMULMAX1-NEXT: vsub.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a3, 3
+; LMULMAX1-NEXT: addi a3, a3, 819
+; LMULMAX1-NEXT: vand.vx v10, v8, a3
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT: vand.vx v8, v8, a3
+; LMULMAX1-NEXT: vadd.vv v8, v10, v8
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT: vadd.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a4, 1
+; LMULMAX1-NEXT: addi a4, a4, -241
+; LMULMAX1-NEXT: vand.vx v8, v8, a4
+; LMULMAX1-NEXT: li a5, 257
+; LMULMAX1-NEXT: vmul.vx v8, v8, a5
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT: vor.vv v9, v9, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 2
+; LMULMAX1-NEXT: vor.vv v9, v9, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT: vor.vv v9, v9, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 8
+; LMULMAX1-NEXT: vor.vv v9, v9, v10
+; LMULMAX1-NEXT: vnot.v v9, v9
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT: vand.vx v10, v10, a2
+; LMULMAX1-NEXT: vsub.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v10, v9, a3
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT: vand.vx v9, v9, a3
+; LMULMAX1-NEXT: vadd.vv v9, v10, v9
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT: vadd.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v9, v9, a4
+; LMULMAX1-NEXT: vmul.vx v9, v9, a5
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 8
+; LMULMAX1-NEXT: vse16.v v9, (a0)
+; LMULMAX1-NEXT: vse16.v v8, (a1)
+; LMULMAX1-NEXT: ret
;
; LMULMAX8-LABEL: ctlz_v16i16:
; LMULMAX8: # %bb.0:
@@ -1041,11 +912,11 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 349525
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10
; LMULMAX2-RV64I-NEXT: lui a1, 209715
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -1053,10 +924,10 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-RV64I-NEXT: lui a1, 61681
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: lui a1, 4112
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
@@ -1461,11 +1332,11 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 5
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 3
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -1473,7 +1344,7 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 1
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: li a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
@@ -1481,75 +1352,40 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0)
; LMULMAX2-RV64I-NEXT: ret
;
-; LMULMAX1-RV32-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vnot.v v8, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT: lui a1, 5
-; LMULMAX1-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 3
-; LMULMAX1-RV32-NEXT: addi a1, a1, 819
-; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 1
-; LMULMAX1-RV32-NEXT: addi a1, a1, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: li a1, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vnot.v v8, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT: lui a1, 5
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 3
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 1
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: li a1, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: ctlz_zero_undef_v8i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT: vle16.v v8, (a0)
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT: vor.vv v8, v8, v9
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 2
+; LMULMAX1-NEXT: vor.vv v8, v8, v9
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT: vor.vv v8, v8, v9
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 8
+; LMULMAX1-NEXT: vor.vv v8, v8, v9
+; LMULMAX1-NEXT: vnot.v v8, v8
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT: lui a1, 5
+; LMULMAX1-NEXT: addi a1, a1, 1365
+; LMULMAX1-NEXT: vand.vx v9, v9, a1
+; LMULMAX1-NEXT: vsub.vv v8, v8, v9
+; LMULMAX1-NEXT: lui a1, 3
+; LMULMAX1-NEXT: addi a1, a1, 819
+; LMULMAX1-NEXT: vand.vx v9, v8, a1
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT: vand.vx v8, v8, a1
+; LMULMAX1-NEXT: vadd.vv v8, v9, v8
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT: vadd.vv v8, v8, v9
+; LMULMAX1-NEXT: lui a1, 1
+; LMULMAX1-NEXT: addi a1, a1, -241
+; LMULMAX1-NEXT: vand.vx v8, v8, a1
+; LMULMAX1-NEXT: li a1, 257
+; LMULMAX1-NEXT: vmul.vx v8, v8, a1
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT: vse16.v v8, (a0)
+; LMULMAX1-NEXT: ret
;
; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v8i16:
; LMULMAX2-RV32F: # %bb.0:
@@ -1676,11 +1512,11 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 349525
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 209715
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -1688,10 +1524,10 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 61681
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: lui a1, 4112
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
@@ -2069,193 +1905,99 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
}
define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2
-; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8
-; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT: lui a1, 5
-; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 3
-; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 1
-; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: li a1, 257
-; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2
-; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8
-; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vnot.v v8, v8
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT: lui a1, 5
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 3
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 1
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: li a1, 257
-; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
-;
-; LMULMAX1-RV32-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8
-; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vnot.v v8, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT: lui a2, 5
-; LMULMAX1-RV32-NEXT: addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a3, 3
-; LMULMAX1-RV32-NEXT: addi a3, a3, 819
-; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a4, 1
-; LMULMAX1-RV32-NEXT: addi a4, a4, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT: li a5, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2
-; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8
-; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vnot.v v9, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8
-; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vnot.v v8, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT: lui a2, 5
-; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a3, 3
-; LMULMAX1-RV64-NEXT: addiw a3, a3, 819
-; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a4, 1
-; LMULMAX1-RV64-NEXT: addiw a4, a4, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT: li a5, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2
-; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8
-; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vnot.v v9, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX2-LABEL: ctlz_zero_undef_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT: vle16.v v8, (a0)
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT: vor.vv v8, v8, v10
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 2
+; LMULMAX2-NEXT: vor.vv v8, v8, v10
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT: vor.vv v8, v8, v10
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 8
+; LMULMAX2-NEXT: vor.vv v8, v8, v10
+; LMULMAX2-NEXT: vnot.v v8, v8
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT: lui a1, 5
+; LMULMAX2-NEXT: addi a1, a1, 1365
+; LMULMAX2-NEXT: vand.vx v10, v10, a1
+; LMULMAX2-NEXT: vsub.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 3
+; LMULMAX2-NEXT: addi a1, a1, 819
+; LMULMAX2-NEXT: vand.vx v10, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 1
+; LMULMAX2-NEXT: addi a1, a1, -241
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: li a1, 257
+; LMULMAX2-NEXT: vmul.vx v8, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX2-NEXT: vse16.v v8, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: ctlz_zero_undef_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle16.v v8, (a1)
+; LMULMAX1-NEXT: vle16.v v9, (a0)
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT: vor.vv v8, v8, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 2
+; LMULMAX1-NEXT: vor.vv v8, v8, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT: vor.vv v8, v8, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 8
+; LMULMAX1-NEXT: vor.vv v8, v8, v10
+; LMULMAX1-NEXT: vnot.v v8, v8
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT: lui a2, 5
+; LMULMAX1-NEXT: addi a2, a2, 1365
+; LMULMAX1-NEXT: vand.vx v10, v10, a2
+; LMULMAX1-NEXT: vsub.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a3, 3
+; LMULMAX1-NEXT: addi a3, a3, 819
+; LMULMAX1-NEXT: vand.vx v10, v8, a3
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT: vand.vx v8, v8, a3
+; LMULMAX1-NEXT: vadd.vv v8, v10, v8
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT: vadd.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a4, 1
+; LMULMAX1-NEXT: addi a4, a4, -241
+; LMULMAX1-NEXT: vand.vx v8, v8, a4
+; LMULMAX1-NEXT: li a5, 257
+; LMULMAX1-NEXT: vmul.vx v8, v8, a5
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT: vor.vv v9, v9, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 2
+; LMULMAX1-NEXT: vor.vv v9, v9, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT: vor.vv v9, v9, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 8
+; LMULMAX1-NEXT: vor.vv v9, v9, v10
+; LMULMAX1-NEXT: vnot.v v9, v9
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT: vand.vx v10, v10, a2
+; LMULMAX1-NEXT: vsub.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v10, v9, a3
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT: vand.vx v9, v9, a3
+; LMULMAX1-NEXT: vadd.vv v9, v10, v9
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT: vadd.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v9, v9, a4
+; LMULMAX1-NEXT: vmul.vx v9, v9, a5
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 8
+; LMULMAX1-NEXT: vse16.v v9, (a0)
+; LMULMAX1-NEXT: vse16.v v8, (a1)
+; LMULMAX1-NEXT: ret
;
; LMULMAX8-LABEL: ctlz_zero_undef_v16i16:
; LMULMAX8: # %bb.0:
@@ -2338,11 +2080,11 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vnot.v v8, v8
; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 349525
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10
; LMULMAX2-RV64I-NEXT: lui a1, 209715
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -2350,10 +2092,10 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-RV64I-NEXT: lui a1, 61681
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: lui a1, 4112
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
@@ -2627,3 +2369,8 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
store <4 x i64> %c, ptr %x
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LMULMAX1-RV32: {{.*}}
+; LMULMAX1-RV64: {{.*}}
+; LMULMAX2-RV32: {{.*}}
+; LMULMAX2-RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
index 55485beff8eb1d5..08f7e2058ad29ea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
@@ -191,105 +191,57 @@ define <16 x i8> @vp_ctpop_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
define <2 x i16> @vp_ctpop_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
ret <2 x i16> %v
}
define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
@@ -299,105 +251,57 @@ define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
define <4 x i16> @vp_ctpop_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
ret <4 x i16> %v
}
define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
@@ -407,105 +311,57 @@ define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
define <8 x i16> @vp_ctpop_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
ret <8 x i16> %v
}
define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
@@ -515,105 +371,57 @@ define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
define <16 x i16> @vp_ctpop_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
ret <16 x i16> %v
}
define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
@@ -623,109 +431,59 @@ define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
define <2 x i32> @vp_ctpop_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
ret <2 x i32> %v
}
define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
@@ -735,109 +493,59 @@ define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
define <4 x i32> @vp_ctpop_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
ret <4 x i32> %v
}
define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
@@ -847,109 +555,59 @@ define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
define <8 x i32> @vp_ctpop_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
ret <8 x i32> %v
}
define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
@@ -959,109 +617,59 @@ define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
define <16 x i32> @vp_ctpop_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
ret <16 x i32> %v
}
define <16 x i32> @vp_ctpop_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_ctpop_v16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_ctpop_v16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_ctpop_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index c7b6db226ee5f6e..0b2a44fa14526be 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -42,109 +42,31 @@ define void @ctpop_v16i8(ptr %x, ptr %y) {
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
define void @ctpop_v8i16(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: ctpop_v8i16:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX2-RV32-NEXT: lui a1, 5
-; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT: lui a1, 3
-; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: vand.vx v9, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v9, v8
-; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT: lui a1, 1
-; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: li a1, 257
-; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: ctpop_v8i16:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX2-RV64-NEXT: lui a1, 5
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1
-; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT: lui a1, 3
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8
-; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT: lui a1, 1
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: li a1, 257
-; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v8i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT: lui a1, 5
-; LMULMAX1-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 3
-; LMULMAX1-RV32-NEXT: addi a1, a1, 819
-; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 1
-; LMULMAX1-RV32-NEXT: addi a1, a1, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: li a1, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: ctpop_v8i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT: lui a1, 5
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 3
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 1
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: li a1, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: ctpop_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a1, 5
+; CHECK-NEXT: addi a1, a1, 1365
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a1, 3
+; CHECK-NEXT: addi a1, a1, 819
+; CHECK-NEXT: vand.vx v9, v8, a1
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: addi a1, a1, -241
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: li a1, 257
+; CHECK-NEXT: vmul.vx v8, v8, a1
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
;
; ZVBB-LABEL: ctpop_v8i16:
; ZVBB: # %bb.0:
@@ -162,113 +84,32 @@ define void @ctpop_v8i16(ptr %x, ptr %y) {
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
define void @ctpop_v4i32(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: ctpop_v4i32:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX2-RV32-NEXT: lui a1, 349525
-; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT: lui a1, 209715
-; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: vand.vx v9, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v9, v8
-; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT: lui a1, 61681
-; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: lui a1, 4112
-; LMULMAX2-RV32-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 24
-; LMULMAX2-RV32-NEXT: vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: ctpop_v4i32:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX2-RV64-NEXT: lui a1, 349525
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1
-; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT: lui a1, 209715
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8
-; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT: lui a1, 61681
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: lui a1, 4112
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 257
-; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 24
-; LMULMAX2-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v4i32:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT: lui a1, 349525
-; LMULMAX1-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 209715
-; LMULMAX1-RV32-NEXT: addi a1, a1, 819
-; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 61681
-; LMULMAX1-RV32-NEXT: addi a1, a1, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: lui a1, 4112
-; LMULMAX1-RV32-NEXT: addi a1, a1, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: ctpop_v4i32:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT: lui a1, 349525
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 209715
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 61681
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: lui a1, 4112
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: ctpop_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a1, 349525
+; CHECK-NEXT: addi a1, a1, 1365
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a1, 209715
+; CHECK-NEXT: addi a1, a1, 819
+; CHECK-NEXT: vand.vx v9, v8, a1
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a1, 61681
+; CHECK-NEXT: addi a1, a1, -241
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: lui a1, 4112
+; CHECK-NEXT: addi a1, a1, 257
+; CHECK-NEXT: vmul.vx v8, v8, a1
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
;
; ZVBB-LABEL: ctpop_v4i32:
; ZVBB: # %bb.0:
@@ -523,139 +364,72 @@ define void @ctpop_v32i8(ptr %x, ptr %y) {
declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
define void @ctpop_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: ctpop_v16i16:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT: lui a1, 5
-; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 3
-; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 1
-; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: li a1, 257
-; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: ctpop_v16i16:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT: lui a1, 5
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 3
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 1
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: li a1, 257
-; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v16i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT: lui a2, 5
-; LMULMAX1-RV32-NEXT: addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a3, 3
-; LMULMAX1-RV32-NEXT: addi a3, a3, 819
-; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a4, 1
-; LMULMAX1-RV32-NEXT: addi a4, a4, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT: li a5, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT: ret
+; LMULMAX2-LABEL: ctpop_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT: vle16.v v8, (a0)
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT: lui a1, 5
+; LMULMAX2-NEXT: addi a1, a1, 1365
+; LMULMAX2-NEXT: vand.vx v10, v10, a1
+; LMULMAX2-NEXT: vsub.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 3
+; LMULMAX2-NEXT: addi a1, a1, 819
+; LMULMAX2-NEXT: vand.vx v10, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 1
+; LMULMAX2-NEXT: addi a1, a1, -241
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: li a1, 257
+; LMULMAX2-NEXT: vmul.vx v8, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX2-NEXT: vse16.v v8, (a0)
+; LMULMAX2-NEXT: ret
;
-; LMULMAX1-RV64-LABEL: ctpop_v16i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT: lui a2, 5
-; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a3, 3
-; LMULMAX1-RV64-NEXT: addiw a3, a3, 819
-; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a4, 1
-; LMULMAX1-RV64-NEXT: addiw a4, a4, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT: li a5, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: ctpop_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle16.v v8, (a1)
+; LMULMAX1-NEXT: vle16.v v9, (a0)
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT: lui a2, 5
+; LMULMAX1-NEXT: addi a2, a2, 1365
+; LMULMAX1-NEXT: vand.vx v10, v10, a2
+; LMULMAX1-NEXT: vsub.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a3, 3
+; LMULMAX1-NEXT: addi a3, a3, 819
+; LMULMAX1-NEXT: vand.vx v10, v8, a3
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT: vand.vx v8, v8, a3
+; LMULMAX1-NEXT: vadd.vv v8, v10, v8
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT: vadd.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a4, 1
+; LMULMAX1-NEXT: addi a4, a4, -241
+; LMULMAX1-NEXT: vand.vx v8, v8, a4
+; LMULMAX1-NEXT: li a5, 257
+; LMULMAX1-NEXT: vmul.vx v8, v8, a5
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT: vand.vx v10, v10, a2
+; LMULMAX1-NEXT: vsub.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v10, v9, a3
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT: vand.vx v9, v9, a3
+; LMULMAX1-NEXT: vadd.vv v9, v10, v9
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT: vadd.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v9, v9, a4
+; LMULMAX1-NEXT: vmul.vx v9, v9, a5
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 8
+; LMULMAX1-NEXT: vse16.v v9, (a0)
+; LMULMAX1-NEXT: vse16.v v8, (a1)
+; LMULMAX1-NEXT: ret
;
; ZVBB-LABEL: ctpop_v16i16:
; ZVBB: # %bb.0:
@@ -673,143 +447,74 @@ define void @ctpop_v16i16(ptr %x, ptr %y) {
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
define void @ctpop_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: ctpop_v8i32:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT: lui a1, 349525
-; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 209715
-; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 61681
-; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: lui a1, 4112
-; LMULMAX2-RV32-NEXT: addi a1, a1, 257
-; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 24
-; LMULMAX2-RV32-NEXT: vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: ctpop_v8i32:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT: lui a1, 349525
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 209715
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 61681
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: lui a1, 4112
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 257
-; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 24
-; LMULMAX2-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v8i32:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle32.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vle32.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT: lui a2, 349525
-; LMULMAX1-RV32-NEXT: addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a3, 209715
-; LMULMAX1-RV32-NEXT: addi a3, a3, 819
-; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a4, 61681
-; LMULMAX1-RV32-NEXT: addi a4, a4, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT: lui a5, 4112
-; LMULMAX1-RV32-NEXT: addi a5, a5, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 24
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT: ret
+; LMULMAX2-LABEL: ctpop_v8i32:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; LMULMAX2-NEXT: vle32.v v8, (a0)
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT: lui a1, 349525
+; LMULMAX2-NEXT: addi a1, a1, 1365
+; LMULMAX2-NEXT: vand.vx v10, v10, a1
+; LMULMAX2-NEXT: vsub.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 209715
+; LMULMAX2-NEXT: addi a1, a1, 819
+; LMULMAX2-NEXT: vand.vx v10, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 61681
+; LMULMAX2-NEXT: addi a1, a1, -241
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: lui a1, 4112
+; LMULMAX2-NEXT: addi a1, a1, 257
+; LMULMAX2-NEXT: vmul.vx v8, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 24
+; LMULMAX2-NEXT: vse32.v v8, (a0)
+; LMULMAX2-NEXT: ret
;
-; LMULMAX1-RV64-LABEL: ctpop_v8i32:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle32.v v8, (a1)
-; LMULMAX1-RV64-NEXT: vle32.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT: lui a2, 349525
-; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a3, 209715
-; LMULMAX1-RV64-NEXT: addiw a3, a3, 819
-; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a4, 61681
-; LMULMAX1-RV64-NEXT: addiw a4, a4, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT: lui a5, 4112
-; LMULMAX1-RV64-NEXT: addiw a5, a5, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 24
-; LMULMAX1-RV64-NEXT: vse32.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: ctpop_v8i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle32.v v8, (a1)
+; LMULMAX1-NEXT: vle32.v v9, (a0)
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT: lui a2, 349525
+; LMULMAX1-NEXT: addi a2, a2, 1365
+; LMULMAX1-NEXT: vand.vx v10, v10, a2
+; LMULMAX1-NEXT: vsub.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a3, 209715
+; LMULMAX1-NEXT: addi a3, a3, 819
+; LMULMAX1-NEXT: vand.vx v10, v8, a3
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT: vand.vx v8, v8, a3
+; LMULMAX1-NEXT: vadd.vv v8, v10, v8
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT: vadd.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a4, 61681
+; LMULMAX1-NEXT: addi a4, a4, -241
+; LMULMAX1-NEXT: vand.vx v8, v8, a4
+; LMULMAX1-NEXT: lui a5, 4112
+; LMULMAX1-NEXT: addi a5, a5, 257
+; LMULMAX1-NEXT: vmul.vx v8, v8, a5
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 24
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT: vand.vx v10, v10, a2
+; LMULMAX1-NEXT: vsub.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v10, v9, a3
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT: vand.vx v9, v9, a3
+; LMULMAX1-NEXT: vadd.vv v9, v10, v9
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT: vadd.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v9, v9, a4
+; LMULMAX1-NEXT: vmul.vx v9, v9, a5
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 24
+; LMULMAX1-NEXT: vse32.v v9, (a0)
+; LMULMAX1-NEXT: vse32.v v8, (a1)
+; LMULMAX1-NEXT: ret
;
; ZVBB-LABEL: ctpop_v8i32:
; ZVBB: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
index 28df7f083c4a03e..345e4180bba31a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
@@ -223,121 +223,65 @@ define <16 x i8> @vp_cttz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
define <2 x i16> @vp_cttz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl)
ret <2 x i16> %v
}
define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl)
@@ -347,121 +291,65 @@ define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
define <4 x i16> @vp_cttz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl)
ret <4 x i16> %v
}
define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl)
@@ -471,121 +359,65 @@ define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
define <8 x i16> @vp_cttz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl)
ret <8 x i16> %v
}
define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl)
@@ -595,121 +427,65 @@ define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
define <16 x i16> @vp_cttz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl)
ret <16 x i16> %v
}
define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl)
@@ -719,125 +495,67 @@ define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
define <2 x i32> @vp_cttz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl)
ret <2 x i32> %v
}
define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl)
@@ -847,125 +565,67 @@ define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
define <4 x i32> @vp_cttz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
- %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
+; CHECK-LABEL: vp_cttz_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
ret <4 x i32> %v
}
define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
@@ -975,125 +635,67 @@ define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
define <8 x i32> @vp_cttz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
ret <8 x i32> %v
}
define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
@@ -1103,125 +705,67 @@ define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
define <16 x i32> @vp_cttz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
ret <16 x i32> %v
}
define <16 x i32> @vp_cttz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_v16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_v16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsub.vx v12, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
@@ -2894,121 +2438,65 @@ define <16 x i8> @vp_cttz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext %
}
define <2 x i16> @vp_cttz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl)
ret <2 x i16> %v
}
define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v2i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v2i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl)
@@ -3016,121 +2504,65 @@ define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %
}
define <4 x i16> @vp_cttz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl)
ret <4 x i16> %v
}
define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v4i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v4i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl)
@@ -3138,121 +2570,65 @@ define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %
}
define <8 x i16> @vp_cttz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl)
ret <8 x i16> %v
}
define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v8i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v8i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl)
@@ -3260,121 +2636,65 @@ define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %
}
define <16 x i16> @vp_cttz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl)
ret <16 x i16> %v
}
define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v16i16_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 5
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 3
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 1
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: li a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v16i16_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 5
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 1
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: li a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 8
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 3
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: li a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 8
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl)
@@ -3382,251 +2702,135 @@ define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroex
}
define <2 x i32> @vp_cttz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
ret <2 x i32> %v
}
define <2 x i32> @vp_cttz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v2i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v2i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
- %head = insertelement <2 x i1> poison, i1 true, i32 0
- %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
- %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
- ret <2 x i32> %v
-}
-
-define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v9, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v9, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_cttz_zero_undef_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl)
ret <4 x i32> %v
}
define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v4i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vsub.vx v9, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v9, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v9, v9, a0
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsrl.vi v9, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v4i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vsub.vx v9, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v9, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v9, v9, a0
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v9, v8
-; RV64-NEXT: vsrl.vi v9, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsub.vx v9, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v9, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: vsrl.vi v9, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl)
@@ -3634,125 +2838,67 @@ define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %
}
define <8 x i32> @vp_cttz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v10, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v10, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl)
ret <8 x i32> %v
}
define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v8i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vsub.vx v10, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v10
-; RV32-NEXT: vsrl.vi v10, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v10, v10, a0
-; RV32-NEXT: vsub.vv v8, v8, v10
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v10, v8
-; RV32-NEXT: vsrl.vi v10, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v8i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vsub.vx v10, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v10
-; RV64-NEXT: vsrl.vi v10, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v10, v10, a0
-; RV64-NEXT: vsub.vv v8, v8, v10
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v10, v8
-; RV64-NEXT: vsrl.vi v10, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsub.vx v10, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v10
+; CHECK-NEXT: vsrl.vi v10, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vsrl.vi v10, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl)
@@ -3760,125 +2906,67 @@ define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %
}
define <16 x i32> @vp_cttz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT: vnot.v v8, v8, v0.t
-; RV32-NEXT: vand.vv v8, v8, v12, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0, v0.t
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1, v0.t
-; RV64-NEXT: vnot.v v8, v8, v0.t
-; RV64-NEXT: vand.vv v8, v8, v12, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0, v0.t
-; RV64-NEXT: vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT: vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0, v0.t
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
-; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t
+; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v12, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t
+; CHECK-NEXT: ret
%v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl)
ret <16 x i32> %v
}
define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
-; RV32-LABEL: vp_cttz_zero_undef_v16i32_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 1
-; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV32-NEXT: vsub.vx v12, v8, a1
-; RV32-NEXT: vnot.v v8, v8
-; RV32-NEXT: vand.vv v8, v8, v12
-; RV32-NEXT: vsrl.vi v12, v8, 1
-; RV32-NEXT: lui a0, 349525
-; RV32-NEXT: addi a0, a0, 1365
-; RV32-NEXT: vand.vx v12, v12, a0
-; RV32-NEXT: vsub.vv v8, v8, v12
-; RV32-NEXT: lui a0, 209715
-; RV32-NEXT: addi a0, a0, 819
-; RV32-NEXT: vand.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vadd.vv v8, v12, v8
-; RV32-NEXT: vsrl.vi v12, v8, 4
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: lui a0, 61681
-; RV32-NEXT: addi a0, a0, -241
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: lui a0, 4112
-; RV32-NEXT: addi a0, a0, 257
-; RV32-NEXT: vmul.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vp_cttz_zero_undef_v16i32_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 1
-; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; RV64-NEXT: vsub.vx v12, v8, a1
-; RV64-NEXT: vnot.v v8, v8
-; RV64-NEXT: vand.vv v8, v8, v12
-; RV64-NEXT: vsrl.vi v12, v8, 1
-; RV64-NEXT: lui a0, 349525
-; RV64-NEXT: addiw a0, a0, 1365
-; RV64-NEXT: vand.vx v12, v12, a0
-; RV64-NEXT: vsub.vv v8, v8, v12
-; RV64-NEXT: lui a0, 209715
-; RV64-NEXT: addiw a0, a0, 819
-; RV64-NEXT: vand.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vadd.vv v8, v12, v8
-; RV64-NEXT: vsrl.vi v12, v8, 4
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: lui a0, 61681
-; RV64-NEXT: addiw a0, a0, -241
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: lui a0, 4112
-; RV64-NEXT: addiw a0, a0, 257
-; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 24
-; RV64-NEXT: ret
+; CHECK-LABEL: vp_cttz_zero_undef_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsub.vx v12, v8, a1
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vand.vv v8, v8, v12
+; CHECK-NEXT: vsrl.vi v12, v8, 1
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addi a0, a0, 1365
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 209715
+; CHECK-NEXT: addi a0, a0, 819
+; CHECK-NEXT: vand.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vadd.vv v8, v12, v8
+; CHECK-NEXT: vsrl.vi v12, v8, 4
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: lui a0, 61681
+; CHECK-NEXT: addi a0, a0, -241
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 4112
+; CHECK-NEXT: addi a0, a0, 257
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 24
+; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 83774704f9ca911..1b422730f2ac7b1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -113,11 +113,11 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 5
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 3
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -125,7 +125,7 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 1
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: li a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
@@ -133,65 +133,35 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0)
; LMULMAX2-RV64I-NEXT: ret
;
-; LMULMAX1-RV32-LABEL: cttz_v8i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: li a1, 1
-; LMULMAX1-RV32-NEXT: vsub.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT: vnot.v v8, v8
-; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT: lui a1, 5
-; LMULMAX1-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 3
-; LMULMAX1-RV32-NEXT: addi a1, a1, 819
-; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 1
-; LMULMAX1-RV32-NEXT: addi a1, a1, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: li a1, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: cttz_v8i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: li a1, 1
-; LMULMAX1-RV64-NEXT: vsub.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT: vnot.v v8, v8
-; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT: lui a1, 5
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 3
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 1
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: li a1, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: cttz_v8i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT: vle16.v v8, (a0)
+; LMULMAX1-NEXT: li a1, 1
+; LMULMAX1-NEXT: vsub.vx v9, v8, a1
+; LMULMAX1-NEXT: vnot.v v8, v8
+; LMULMAX1-NEXT: vand.vv v8, v8, v9
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT: lui a1, 5
+; LMULMAX1-NEXT: addi a1, a1, 1365
+; LMULMAX1-NEXT: vand.vx v9, v9, a1
+; LMULMAX1-NEXT: vsub.vv v8, v8, v9
+; LMULMAX1-NEXT: lui a1, 3
+; LMULMAX1-NEXT: addi a1, a1, 819
+; LMULMAX1-NEXT: vand.vx v9, v8, a1
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT: vand.vx v8, v8, a1
+; LMULMAX1-NEXT: vadd.vv v8, v9, v8
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT: vadd.vv v8, v8, v9
+; LMULMAX1-NEXT: lui a1, 1
+; LMULMAX1-NEXT: addi a1, a1, -241
+; LMULMAX1-NEXT: vand.vx v8, v8, a1
+; LMULMAX1-NEXT: li a1, 257
+; LMULMAX1-NEXT: vmul.vx v8, v8, a1
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT: vse16.v v8, (a0)
+; LMULMAX1-NEXT: ret
;
; LMULMAX2-RV32F-LABEL: cttz_v8i16:
; LMULMAX2-RV32F: # %bb.0:
@@ -330,11 +300,11 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 349525
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 209715
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -342,10 +312,10 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 61681
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: lui a1, 4112
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
@@ -752,161 +722,83 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32-LABEL: cttz_v16i16:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: li a1, 1
-; LMULMAX2-RV32-NEXT: vsub.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT: lui a1, 5
-; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 3
-; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 1
-; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: li a1, 257
-; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: cttz_v16i16:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: li a1, 1
-; LMULMAX2-RV64-NEXT: vsub.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT: vnot.v v8, v8
-; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT: lui a1, 5
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 3
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 1
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: li a1, 257
-; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
-;
-; LMULMAX1-RV32-LABEL: cttz_v16i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: li a2, 1
-; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2
-; LMULMAX1-RV32-NEXT: vnot.v v8, v8
-; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT: lui a3, 5
-; LMULMAX1-RV32-NEXT: addi a3, a3, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a4, 3
-; LMULMAX1-RV32-NEXT: addi a4, a4, 819
-; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a4
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a5, 1
-; LMULMAX1-RV32-NEXT: addi a5, a5, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT: li a6, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a6
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2
-; LMULMAX1-RV32-NEXT: vnot.v v9, v9
-; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a4
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a6
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: cttz_v16i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT: li a2, 1
-; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2
-; LMULMAX1-RV64-NEXT: vnot.v v8, v8
-; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT: lui a3, 5
-; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a4, 3
-; LMULMAX1-RV64-NEXT: addiw a4, a4, 819
-; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a5, 1
-; LMULMAX1-RV64-NEXT: addiw a5, a5, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT: li a6, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2
-; LMULMAX1-RV64-NEXT: vnot.v v9, v9
-; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a4
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX2-LABEL: cttz_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT: vle16.v v8, (a0)
+; LMULMAX2-NEXT: li a1, 1
+; LMULMAX2-NEXT: vsub.vx v10, v8, a1
+; LMULMAX2-NEXT: vnot.v v8, v8
+; LMULMAX2-NEXT: vand.vv v8, v8, v10
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT: lui a1, 5
+; LMULMAX2-NEXT: addi a1, a1, 1365
+; LMULMAX2-NEXT: vand.vx v10, v10, a1
+; LMULMAX2-NEXT: vsub.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 3
+; LMULMAX2-NEXT: addi a1, a1, 819
+; LMULMAX2-NEXT: vand.vx v10, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 1
+; LMULMAX2-NEXT: addi a1, a1, -241
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: li a1, 257
+; LMULMAX2-NEXT: vmul.vx v8, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX2-NEXT: vse16.v v8, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: cttz_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle16.v v8, (a1)
+; LMULMAX1-NEXT: vle16.v v9, (a0)
+; LMULMAX1-NEXT: li a2, 1
+; LMULMAX1-NEXT: vsub.vx v10, v8, a2
+; LMULMAX1-NEXT: vnot.v v8, v8
+; LMULMAX1-NEXT: vand.vv v8, v8, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT: lui a3, 5
+; LMULMAX1-NEXT: addi a3, a3, 1365
+; LMULMAX1-NEXT: vand.vx v10, v10, a3
+; LMULMAX1-NEXT: vsub.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a4, 3
+; LMULMAX1-NEXT: addi a4, a4, 819
+; LMULMAX1-NEXT: vand.vx v10, v8, a4
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT: vand.vx v8, v8, a4
+; LMULMAX1-NEXT: vadd.vv v8, v10, v8
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT: vadd.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a5, 1
+; LMULMAX1-NEXT: addi a5, a5, -241
+; LMULMAX1-NEXT: vand.vx v8, v8, a5
+; LMULMAX1-NEXT: li a6, 257
+; LMULMAX1-NEXT: vmul.vx v8, v8, a6
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT: vsub.vx v10, v9, a2
+; LMULMAX1-NEXT: vnot.v v9, v9
+; LMULMAX1-NEXT: vand.vv v9, v9, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT: vand.vx v10, v10, a3
+; LMULMAX1-NEXT: vsub.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v10, v9, a4
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT: vand.vx v9, v9, a4
+; LMULMAX1-NEXT: vadd.vv v9, v10, v9
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT: vadd.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v9, v9, a5
+; LMULMAX1-NEXT: vmul.vx v9, v9, a6
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 8
+; LMULMAX1-NEXT: vse16.v v9, (a0)
+; LMULMAX1-NEXT: vse16.v v8, (a1)
+; LMULMAX1-NEXT: ret
;
; LMULMAX8-LABEL: cttz_v16i16:
; LMULMAX8: # %bb.0:
@@ -981,11 +873,11 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10
; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 349525
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10
; LMULMAX2-RV64I-NEXT: lui a1, 209715
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -993,10 +885,10 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-RV64I-NEXT: lui a1, 61681
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: lui a1, 4112
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
@@ -1400,11 +1292,11 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 5
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 3
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -1412,7 +1304,7 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 1
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: li a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
@@ -1420,65 +1312,35 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0)
; LMULMAX2-RV64I-NEXT: ret
;
-; LMULMAX1-RV32-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: li a1, 1
-; LMULMAX1-RV32-NEXT: vsub.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT: vnot.v v8, v8
-; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT: lui a1, 5
-; LMULMAX1-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 3
-; LMULMAX1-RV32-NEXT: addi a1, a1, 819
-; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: lui a1, 1
-; LMULMAX1-RV32-NEXT: addi a1, a1, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: li a1, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: li a1, 1
-; LMULMAX1-RV64-NEXT: vsub.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT: vnot.v v8, v8
-; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT: lui a1, 5
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 3
-; LMULMAX1-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: lui a1, 1
-; LMULMAX1-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: li a1, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: cttz_zero_undef_v8i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT: vle16.v v8, (a0)
+; LMULMAX1-NEXT: li a1, 1
+; LMULMAX1-NEXT: vsub.vx v9, v8, a1
+; LMULMAX1-NEXT: vnot.v v8, v8
+; LMULMAX1-NEXT: vand.vv v8, v8, v9
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 1
+; LMULMAX1-NEXT: lui a1, 5
+; LMULMAX1-NEXT: addi a1, a1, 1365
+; LMULMAX1-NEXT: vand.vx v9, v9, a1
+; LMULMAX1-NEXT: vsub.vv v8, v8, v9
+; LMULMAX1-NEXT: lui a1, 3
+; LMULMAX1-NEXT: addi a1, a1, 819
+; LMULMAX1-NEXT: vand.vx v9, v8, a1
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT: vand.vx v8, v8, a1
+; LMULMAX1-NEXT: vadd.vv v8, v9, v8
+; LMULMAX1-NEXT: vsrl.vi v9, v8, 4
+; LMULMAX1-NEXT: vadd.vv v8, v8, v9
+; LMULMAX1-NEXT: lui a1, 1
+; LMULMAX1-NEXT: addi a1, a1, -241
+; LMULMAX1-NEXT: vand.vx v8, v8, a1
+; LMULMAX1-NEXT: li a1, 257
+; LMULMAX1-NEXT: vmul.vx v8, v8, a1
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT: vse16.v v8, (a0)
+; LMULMAX1-NEXT: ret
;
; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v8i16:
; LMULMAX2-RV32F: # %bb.0:
@@ -1601,11 +1463,11 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 349525
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 209715
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -1613,10 +1475,10 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9
; LMULMAX2-RV64I-NEXT: lui a1, 61681
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: lui a1, 4112
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
@@ -1987,161 +1849,83 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
}
define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: li a1, 1
-; LMULMAX2-RV32-NEXT: vsub.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT: vnot.v v8, v8
-; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT: lui a1, 5
-; LMULMAX2-RV32-NEXT: addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 3
-; LMULMAX2-RV32-NEXT: addi a1, a1, 819
-; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 1
-; LMULMAX2-RV32-NEXT: addi a1, a1, -241
-; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: li a1, 257
-; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: li a1, 1
-; LMULMAX2-RV64-NEXT: vsub.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT: vnot.v v8, v8
-; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT: lui a1, 5
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 3
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 1
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: li a1, 257
-; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
-;
-; LMULMAX1-RV32-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: li a2, 1
-; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2
-; LMULMAX1-RV32-NEXT: vnot.v v8, v8
-; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT: lui a3, 5
-; LMULMAX1-RV32-NEXT: addi a3, a3, 1365
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a4, 3
-; LMULMAX1-RV32-NEXT: addi a4, a4, 819
-; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a4
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: lui a5, 1
-; LMULMAX1-RV32-NEXT: addi a5, a5, -241
-; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT: li a6, 257
-; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a6
-; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2
-; LMULMAX1-RV32-NEXT: vnot.v v9, v9
-; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a4
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a6
-; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT: li a2, 1
-; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2
-; LMULMAX1-RV64-NEXT: vnot.v v8, v8
-; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT: lui a3, 5
-; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a4, 3
-; LMULMAX1-RV64-NEXT: addiw a4, a4, 819
-; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: lui a5, 1
-; LMULMAX1-RV64-NEXT: addiw a5, a5, -241
-; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT: li a6, 257
-; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6
-; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2
-; LMULMAX1-RV64-NEXT: vnot.v v9, v9
-; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a4
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6
-; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX2-LABEL: cttz_zero_undef_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT: vle16.v v8, (a0)
+; LMULMAX2-NEXT: li a1, 1
+; LMULMAX2-NEXT: vsub.vx v10, v8, a1
+; LMULMAX2-NEXT: vnot.v v8, v8
+; LMULMAX2-NEXT: vand.vv v8, v8, v10
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT: lui a1, 5
+; LMULMAX2-NEXT: addi a1, a1, 1365
+; LMULMAX2-NEXT: vand.vx v10, v10, a1
+; LMULMAX2-NEXT: vsub.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 3
+; LMULMAX2-NEXT: addi a1, a1, 819
+; LMULMAX2-NEXT: vand.vx v10, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: vadd.vv v8, v10, v8
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 1
+; LMULMAX2-NEXT: addi a1, a1, -241
+; LMULMAX2-NEXT: vand.vx v8, v8, a1
+; LMULMAX2-NEXT: li a1, 257
+; LMULMAX2-NEXT: vmul.vx v8, v8, a1
+; LMULMAX2-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX2-NEXT: vse16.v v8, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: cttz_zero_undef_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle16.v v8, (a1)
+; LMULMAX1-NEXT: vle16.v v9, (a0)
+; LMULMAX1-NEXT: li a2, 1
+; LMULMAX1-NEXT: vsub.vx v10, v8, a2
+; LMULMAX1-NEXT: vnot.v v8, v8
+; LMULMAX1-NEXT: vand.vv v8, v8, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT: lui a3, 5
+; LMULMAX1-NEXT: addi a3, a3, 1365
+; LMULMAX1-NEXT: vand.vx v10, v10, a3
+; LMULMAX1-NEXT: vsub.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a4, 3
+; LMULMAX1-NEXT: addi a4, a4, 819
+; LMULMAX1-NEXT: vand.vx v10, v8, a4
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT: vand.vx v8, v8, a4
+; LMULMAX1-NEXT: vadd.vv v8, v10, v8
+; LMULMAX1-NEXT: vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT: vadd.vv v8, v8, v10
+; LMULMAX1-NEXT: lui a5, 1
+; LMULMAX1-NEXT: addi a5, a5, -241
+; LMULMAX1-NEXT: vand.vx v8, v8, a5
+; LMULMAX1-NEXT: li a6, 257
+; LMULMAX1-NEXT: vmul.vx v8, v8, a6
+; LMULMAX1-NEXT: vsrl.vi v8, v8, 8
+; LMULMAX1-NEXT: vsub.vx v10, v9, a2
+; LMULMAX1-NEXT: vnot.v v9, v9
+; LMULMAX1-NEXT: vand.vv v9, v9, v10
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT: vand.vx v10, v10, a3
+; LMULMAX1-NEXT: vsub.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v10, v9, a4
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT: vand.vx v9, v9, a4
+; LMULMAX1-NEXT: vadd.vv v9, v10, v9
+; LMULMAX1-NEXT: vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT: vadd.vv v9, v9, v10
+; LMULMAX1-NEXT: vand.vx v9, v9, a5
+; LMULMAX1-NEXT: vmul.vx v9, v9, a6
+; LMULMAX1-NEXT: vsrl.vi v9, v9, 8
+; LMULMAX1-NEXT: vse16.v v9, (a0)
+; LMULMAX1-NEXT: vse16.v v8, (a1)
+; LMULMAX1-NEXT: ret
;
; LMULMAX8-LABEL: cttz_zero_undef_v16i16:
; LMULMAX8: # %bb.0:
@@ -2212,11 +1996,11 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10
; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1
; LMULMAX2-RV64I-NEXT: lui a1, 349525
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365
; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1
; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10
; LMULMAX2-RV64I-NEXT: lui a1, 209715
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 819
; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
@@ -2224,10 +2008,10 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4
; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-RV64I-NEXT: lui a1, 61681
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241
+; LMULMAX2-RV64I-NEXT: addi a1, a1, -241
; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: lui a1, 4112
-; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257
+; LMULMAX2-RV64I-NEXT: addi a1, a1, 257
; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1
; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24
; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0)
@@ -2501,3 +2285,8 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
store <4 x i64> %c, ptr %x
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LMULMAX1-RV32: {{.*}}
+; LMULMAX1-RV64: {{.*}}
+; LMULMAX2-RV32: {{.*}}
+; LMULMAX2-RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index 5605437443d76bb..95c1beb284c4003 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -997,7 +997,7 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
; RV64NOM-NEXT: vand.vv v8, v8, v11
; RV64NOM-NEXT: vadd.vv v8, v9, v8
; RV64NOM-NEXT: lui a0, 12320
-; RV64NOM-NEXT: addiw a0, a0, 257
+; RV64NOM-NEXT: addi a0, a0, 257
; RV64NOM-NEXT: vmv.s.x v9, a0
; RV64NOM-NEXT: vsext.vf4 v10, v9
; RV64NOM-NEXT: vsra.vv v8, v8, v10
@@ -1053,7 +1053,7 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) {
; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64NOM-NEXT: vsrl.vi v8, v8, 0
; RV64NOM-NEXT: lui a0, 322639
-; RV64NOM-NEXT: addiw a0, a0, -945
+; RV64NOM-NEXT: addi a0, a0, -945
; RV64NOM-NEXT: vmulhu.vx v8, v8, a0
; RV64NOM-NEXT: vslidedown.vi v8, v8, 2
; RV64NOM-NEXT: vmv.x.s a0, v8
@@ -1064,7 +1064,7 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) {
; RV64M-LABEL: extractelt_udiv_v4i32:
; RV64M: # %bb.0:
; RV64M-NEXT: lui a0, 322639
-; RV64M-NEXT: addiw a0, a0, -945
+; RV64M-NEXT: addi a0, a0, -945
; RV64M-NEXT: slli a0, a0, 32
; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64M-NEXT: vslidedown.vi v8, v8, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index c1a4aa4b05d4bec..ea818df7329c7d1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -236,79 +236,42 @@ define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) {
}
define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
-; RV32-V128-LABEL: interleave_v32f32:
-; RV32-V128: # %bb.0:
-; RV32-V128-NEXT: addi sp, sp, -16
-; RV32-V128-NEXT: .cfi_def_cfa_offset 16
-; RV32-V128-NEXT: csrr a0, vlenb
-; RV32-V128-NEXT: slli a0, a0, 2
-; RV32-V128-NEXT: sub sp, sp, a0
-; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; RV32-V128-NEXT: lui a0, %hi(.LCPI10_0)
-; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_0)
-; RV32-V128-NEXT: li a1, 32
-; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
-; RV32-V128-NEXT: vle16.v v4, (a0)
-; RV32-V128-NEXT: lui a0, %hi(.LCPI10_1)
-; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_1)
-; RV32-V128-NEXT: vle16.v v24, (a0)
-; RV32-V128-NEXT: addi a0, sp, 16
-; RV32-V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill
-; RV32-V128-NEXT: lui a0, 699051
-; RV32-V128-NEXT: addi a0, a0, -1366
-; RV32-V128-NEXT: vmv.s.x v0, a0
-; RV32-V128-NEXT: vrgatherei16.vv v24, v8, v4
-; RV32-V128-NEXT: addi a0, sp, 16
-; RV32-V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload
-; RV32-V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t
-; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-V128-NEXT: vwaddu.vv v0, v8, v16
-; RV32-V128-NEXT: li a0, -1
-; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16
-; RV32-V128-NEXT: vmv8r.v v8, v0
-; RV32-V128-NEXT: vmv8r.v v16, v24
-; RV32-V128-NEXT: csrr a0, vlenb
-; RV32-V128-NEXT: slli a0, a0, 2
-; RV32-V128-NEXT: add sp, sp, a0
-; RV32-V128-NEXT: addi sp, sp, 16
-; RV32-V128-NEXT: ret
-;
-; RV64-V128-LABEL: interleave_v32f32:
-; RV64-V128: # %bb.0:
-; RV64-V128-NEXT: addi sp, sp, -16
-; RV64-V128-NEXT: .cfi_def_cfa_offset 16
-; RV64-V128-NEXT: csrr a0, vlenb
-; RV64-V128-NEXT: slli a0, a0, 2
-; RV64-V128-NEXT: sub sp, sp, a0
-; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; RV64-V128-NEXT: lui a0, %hi(.LCPI10_0)
-; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_0)
-; RV64-V128-NEXT: li a1, 32
-; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
-; RV64-V128-NEXT: vle16.v v4, (a0)
-; RV64-V128-NEXT: lui a0, %hi(.LCPI10_1)
-; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_1)
-; RV64-V128-NEXT: vle16.v v24, (a0)
-; RV64-V128-NEXT: addi a0, sp, 16
-; RV64-V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill
-; RV64-V128-NEXT: lui a0, 699051
-; RV64-V128-NEXT: addiw a0, a0, -1366
-; RV64-V128-NEXT: vmv.s.x v0, a0
-; RV64-V128-NEXT: vrgatherei16.vv v24, v8, v4
-; RV64-V128-NEXT: addi a0, sp, 16
-; RV64-V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload
-; RV64-V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t
-; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV64-V128-NEXT: vwaddu.vv v0, v8, v16
-; RV64-V128-NEXT: li a0, -1
-; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16
-; RV64-V128-NEXT: vmv8r.v v8, v0
-; RV64-V128-NEXT: vmv8r.v v16, v24
-; RV64-V128-NEXT: csrr a0, vlenb
-; RV64-V128-NEXT: slli a0, a0, 2
-; RV64-V128-NEXT: add sp, sp, a0
-; RV64-V128-NEXT: addi sp, sp, 16
-; RV64-V128-NEXT: ret
+; V128-LABEL: interleave_v32f32:
+; V128: # %bb.0:
+; V128-NEXT: addi sp, sp, -16
+; V128-NEXT: .cfi_def_cfa_offset 16
+; V128-NEXT: csrr a0, vlenb
+; V128-NEXT: slli a0, a0, 2
+; V128-NEXT: sub sp, sp, a0
+; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; V128-NEXT: lui a0, %hi(.LCPI10_0)
+; V128-NEXT: addi a0, a0, %lo(.LCPI10_0)
+; V128-NEXT: li a1, 32
+; V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
+; V128-NEXT: vle16.v v4, (a0)
+; V128-NEXT: lui a0, %hi(.LCPI10_1)
+; V128-NEXT: addi a0, a0, %lo(.LCPI10_1)
+; V128-NEXT: vle16.v v24, (a0)
+; V128-NEXT: addi a0, sp, 16
+; V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill
+; V128-NEXT: lui a0, 699051
+; V128-NEXT: addi a0, a0, -1366
+; V128-NEXT: vmv.s.x v0, a0
+; V128-NEXT: vrgatherei16.vv v24, v8, v4
+; V128-NEXT: addi a0, sp, 16
+; V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload
+; V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t
+; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; V128-NEXT: vwaddu.vv v0, v8, v16
+; V128-NEXT: li a0, -1
+; V128-NEXT: vwmaccu.vx v0, a0, v16
+; V128-NEXT: vmv8r.v v8, v0
+; V128-NEXT: vmv8r.v v16, v24
+; V128-NEXT: csrr a0, vlenb
+; V128-NEXT: slli a0, a0, 2
+; V128-NEXT: add sp, sp, a0
+; V128-NEXT: addi sp, sp, 16
+; V128-NEXT: ret
;
; V512-LABEL: interleave_v32f32:
; V512: # %bb.0:
@@ -375,31 +338,18 @@ define <4 x float> @unary_interleave_v4f32(<4 x float> %x) {
; FIXME: Is there better codegen we can do here?
define <4 x double> @unary_interleave_v4f64(<4 x double> %x) {
-; RV32-V128-LABEL: unary_interleave_v4f64:
-; RV32-V128: # %bb.0:
-; RV32-V128-NEXT: lui a0, 12304
-; RV32-V128-NEXT: addi a0, a0, 512
-; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-V128-NEXT: vmv.s.x v10, a0
-; RV32-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-V128-NEXT: vsext.vf2 v12, v10
-; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12
-; RV32-V128-NEXT: vmv.v.v v8, v10
-; RV32-V128-NEXT: ret
-;
-; RV64-V128-LABEL: unary_interleave_v4f64:
-; RV64-V128: # %bb.0:
-; RV64-V128-NEXT: lui a0, 12304
-; RV64-V128-NEXT: addiw a0, a0, 512
-; RV64-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-V128-NEXT: vmv.s.x v10, a0
-; RV64-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-V128-NEXT: vsext.vf2 v12, v10
-; RV64-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-V128-NEXT: vrgatherei16.vv v10, v8, v12
-; RV64-V128-NEXT: vmv.v.v v8, v10
-; RV64-V128-NEXT: ret
+; V128-LABEL: unary_interleave_v4f64:
+; V128: # %bb.0:
+; V128-NEXT: lui a0, 12304
+; V128-NEXT: addi a0, a0, 512
+; V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; V128-NEXT: vmv.s.x v10, a0
+; V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; V128-NEXT: vsext.vf2 v12, v10
+; V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; V128-NEXT: vrgatherei16.vv v10, v8, v12
+; V128-NEXT: vmv.v.v v8, v10
+; V128-NEXT: ret
;
; RV32-V512-LABEL: unary_interleave_v4f64:
; RV32-V512: # %bb.0:
@@ -417,7 +367,7 @@ define <4 x double> @unary_interleave_v4f64(<4 x double> %x) {
; RV64-V512-LABEL: unary_interleave_v4f64:
; RV64-V512: # %bb.0:
; RV64-V512-NEXT: lui a0, 12304
-; RV64-V512-NEXT: addiw a0, a0, 512
+; RV64-V512-NEXT: addi a0, a0, 512
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-V512-NEXT: vmv.s.x v9, a0
; RV64-V512-NEXT: vsext.vf8 v10, v9
@@ -477,3 +427,6 @@ define <8 x float> @unary_interleave_v8f32(<8 x float> %x) {
%a = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 4, i32 0, i32 undef, i32 1, i32 6, i32 undef, i32 7, i32 3>
ret <8 x float> %a
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32-V128: {{.*}}
+; RV64-V128: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
index a7852ea5843d8a0..59eb4b89a2f56e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -55,61 +55,35 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
}
define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
-; RV32-LABEL: vrgather_permute_shuffle_vu_v4f64:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 4096
-; RV32-NEXT: addi a0, a0, 513
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v10, a0
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v10
-; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vrgatherei16.vv v10, v8, v12
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrgather_permute_shuffle_vu_v4f64:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 4096
-; RV64-NEXT: addiw a0, a0, 513
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v10, a0
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsext.vf2 v12, v10
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vrgatherei16.vv v10, v8, v12
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 4096
+; CHECK-NEXT: addi a0, a0, 513
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
%s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
ret <4 x double> %s
}
define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
-; RV32-LABEL: vrgather_permute_shuffle_uv_v4f64:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 4096
-; RV32-NEXT: addi a0, a0, 513
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v10, a0
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v10
-; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vrgatherei16.vv v10, v8, v12
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrgather_permute_shuffle_uv_v4f64:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 4096
-; RV64-NEXT: addiw a0, a0, 513
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v10, a0
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsext.vf2 v12, v10
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vrgatherei16.vv v10, v8, v12
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: vrgather_permute_shuffle_uv_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 4096
+; CHECK-NEXT: addi a0, a0, 513
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
%s = shufflevector <4 x double> poison, <4 x double> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
ret <4 x double> %s
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index b648420aa2e03cb..e9412019a0dec8e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -324,40 +324,24 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
}
define <4 x i8> @buildvec_no_vid_v4i8_0() {
-; RV32-LABEL: buildvec_no_vid_v4i8_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 28768
-; RV32-NEXT: addi a0, a0, 769
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: buildvec_no_vid_v4i8_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 28768
-; RV64-NEXT: addiw a0, a0, 769
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: buildvec_no_vid_v4i8_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 28768
+; CHECK-NEXT: addi a0, a0, 769
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
ret <4 x i8> <i8 1, i8 3, i8 6, i8 7>
}
define <4 x i8> @buildvec_no_vid_v4i8_1() {
-; RV32-LABEL: buildvec_no_vid_v4i8_1:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 28752
-; RV32-NEXT: addi a0, a0, 512
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: buildvec_no_vid_v4i8_1:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 28752
-; RV64-NEXT: addiw a0, a0, 512
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: buildvec_no_vid_v4i8_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 28752
+; CHECK-NEXT: addi a0, a0, 512
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 2, i8 5, i8 7>
}
@@ -372,21 +356,13 @@ define <4 x i8> @buildvec_no_vid_v4i8_2() {
}
define <4 x i8> @buildvec_no_vid_v4i8_3() {
-; RV32-LABEL: buildvec_no_vid_v4i8_3:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 28672
-; RV32-NEXT: addi a0, a0, 255
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: buildvec_no_vid_v4i8_3:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 28672
-; RV64-NEXT: addiw a0, a0, 255
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: buildvec_no_vid_v4i8_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 28672
+; CHECK-NEXT: addi a0, a0, 255
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
ret <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>
}
@@ -400,21 +376,13 @@ define <4 x i8> @buildvec_no_vid_v4i8_4() {
}
define <4 x i8> @buildvec_no_vid_v4i8_5() {
-; RV32-LABEL: buildvec_no_vid_v4i8_5:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1032144
-; RV32-NEXT: addi a0, a0, -257
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: buildvec_no_vid_v4i8_5:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1032144
-; RV64-NEXT: addiw a0, a0, -257
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: buildvec_no_vid_v4i8_5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1032144
+; CHECK-NEXT: addi a0, a0, -257
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
ret <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>
}
@@ -529,25 +497,15 @@ define void @buildvec_seq_v8i8_v4i16(ptr %x) {
}
define void @buildvec_seq_v8i8_v2i32(ptr %x) {
-; RV32-LABEL: buildvec_seq_v8i8_v2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a1, 48
-; RV32-NEXT: addi a1, a1, 513
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a1
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vse8.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: buildvec_seq_v8i8_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, 48
-; RV64-NEXT: addiw a1, a1, 513
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a1
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vse8.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: buildvec_seq_v8i8_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 48
+; CHECK-NEXT: addi a1, a1, 513
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
store <8 x i8> <i8 1, i8 2, i8 3, i8 undef, i8 1, i8 2, i8 3, i8 undef>, ptr %x
ret void
}
@@ -823,21 +781,13 @@ define <4 x i8> @buildvec_not_vid_v4i8_1() {
}
define <4 x i8> @buildvec_not_vid_v4i8_2() {
-; RV32-LABEL: buildvec_not_vid_v4i8_2:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, 771
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: buildvec_not_vid_v4i8_2:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, 771
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: buildvec_not_vid_v4i8_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, 771
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
ret <4 x i8> <i8 3, i8 3, i8 1, i8 0>
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index 83e64651c5c6313..a54fa2e9b765fef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -395,79 +395,42 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
}
define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
-; RV32-V128-LABEL: interleave_v32i32:
-; RV32-V128: # %bb.0:
-; RV32-V128-NEXT: addi sp, sp, -16
-; RV32-V128-NEXT: .cfi_def_cfa_offset 16
-; RV32-V128-NEXT: csrr a0, vlenb
-; RV32-V128-NEXT: slli a0, a0, 2
-; RV32-V128-NEXT: sub sp, sp, a0
-; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; RV32-V128-NEXT: lui a0, %hi(.LCPI17_0)
-; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_0)
-; RV32-V128-NEXT: li a1, 32
-; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
-; RV32-V128-NEXT: vle16.v v4, (a0)
-; RV32-V128-NEXT: lui a0, %hi(.LCPI17_1)
-; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_1)
-; RV32-V128-NEXT: vle16.v v24, (a0)
-; RV32-V128-NEXT: addi a0, sp, 16
-; RV32-V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill
-; RV32-V128-NEXT: lui a0, 699051
-; RV32-V128-NEXT: addi a0, a0, -1366
-; RV32-V128-NEXT: vmv.s.x v0, a0
-; RV32-V128-NEXT: vrgatherei16.vv v24, v8, v4
-; RV32-V128-NEXT: addi a0, sp, 16
-; RV32-V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload
-; RV32-V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t
-; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-V128-NEXT: vwaddu.vv v0, v8, v16
-; RV32-V128-NEXT: li a0, -1
-; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16
-; RV32-V128-NEXT: vmv8r.v v8, v0
-; RV32-V128-NEXT: vmv8r.v v16, v24
-; RV32-V128-NEXT: csrr a0, vlenb
-; RV32-V128-NEXT: slli a0, a0, 2
-; RV32-V128-NEXT: add sp, sp, a0
-; RV32-V128-NEXT: addi sp, sp, 16
-; RV32-V128-NEXT: ret
-;
-; RV64-V128-LABEL: interleave_v32i32:
-; RV64-V128: # %bb.0:
-; RV64-V128-NEXT: addi sp, sp, -16
-; RV64-V128-NEXT: .cfi_def_cfa_offset 16
-; RV64-V128-NEXT: csrr a0, vlenb
-; RV64-V128-NEXT: slli a0, a0, 2
-; RV64-V128-NEXT: sub sp, sp, a0
-; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; RV64-V128-NEXT: lui a0, %hi(.LCPI17_0)
-; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_0)
-; RV64-V128-NEXT: li a1, 32
-; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
-; RV64-V128-NEXT: vle16.v v4, (a0)
-; RV64-V128-NEXT: lui a0, %hi(.LCPI17_1)
-; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_1)
-; RV64-V128-NEXT: vle16.v v24, (a0)
-; RV64-V128-NEXT: addi a0, sp, 16
-; RV64-V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill
-; RV64-V128-NEXT: lui a0, 699051
-; RV64-V128-NEXT: addiw a0, a0, -1366
-; RV64-V128-NEXT: vmv.s.x v0, a0
-; RV64-V128-NEXT: vrgatherei16.vv v24, v8, v4
-; RV64-V128-NEXT: addi a0, sp, 16
-; RV64-V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload
-; RV64-V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t
-; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV64-V128-NEXT: vwaddu.vv v0, v8, v16
-; RV64-V128-NEXT: li a0, -1
-; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16
-; RV64-V128-NEXT: vmv8r.v v8, v0
-; RV64-V128-NEXT: vmv8r.v v16, v24
-; RV64-V128-NEXT: csrr a0, vlenb
-; RV64-V128-NEXT: slli a0, a0, 2
-; RV64-V128-NEXT: add sp, sp, a0
-; RV64-V128-NEXT: addi sp, sp, 16
-; RV64-V128-NEXT: ret
+; V128-LABEL: interleave_v32i32:
+; V128: # %bb.0:
+; V128-NEXT: addi sp, sp, -16
+; V128-NEXT: .cfi_def_cfa_offset 16
+; V128-NEXT: csrr a0, vlenb
+; V128-NEXT: slli a0, a0, 2
+; V128-NEXT: sub sp, sp, a0
+; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; V128-NEXT: lui a0, %hi(.LCPI17_0)
+; V128-NEXT: addi a0, a0, %lo(.LCPI17_0)
+; V128-NEXT: li a1, 32
+; V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu
+; V128-NEXT: vle16.v v4, (a0)
+; V128-NEXT: lui a0, %hi(.LCPI17_1)
+; V128-NEXT: addi a0, a0, %lo(.LCPI17_1)
+; V128-NEXT: vle16.v v24, (a0)
+; V128-NEXT: addi a0, sp, 16
+; V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill
+; V128-NEXT: lui a0, 699051
+; V128-NEXT: addi a0, a0, -1366
+; V128-NEXT: vmv.s.x v0, a0
+; V128-NEXT: vrgatherei16.vv v24, v8, v4
+; V128-NEXT: addi a0, sp, 16
+; V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload
+; V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t
+; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; V128-NEXT: vwaddu.vv v0, v8, v16
+; V128-NEXT: li a0, -1
+; V128-NEXT: vwmaccu.vx v0, a0, v16
+; V128-NEXT: vmv8r.v v8, v0
+; V128-NEXT: vmv8r.v v16, v24
+; V128-NEXT: csrr a0, vlenb
+; V128-NEXT: slli a0, a0, 2
+; V128-NEXT: add sp, sp, a0
+; V128-NEXT: addi sp, sp, 16
+; V128-NEXT: ret
;
; V512-LABEL: interleave_v32i32:
; V512: # %bb.0:
@@ -509,49 +472,27 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) {
; This shouldn't be interleaved
define <4 x i8> @unary_interleave_v4i8_invalid(<4 x i8> %x) {
-; RV32-V128-LABEL: unary_interleave_v4i8_invalid:
-; RV32-V128: # %bb.0:
-; RV32-V128-NEXT: lui a0, 16
-; RV32-V128-NEXT: addi a0, a0, 768
-; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-V128-NEXT: vmv.s.x v10, a0
-; RV32-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; RV32-V128-NEXT: vrgather.vv v9, v8, v10
-; RV32-V128-NEXT: vmv1r.v v8, v9
-; RV32-V128-NEXT: ret
-;
-; RV64-V128-LABEL: unary_interleave_v4i8_invalid:
-; RV64-V128: # %bb.0:
-; RV64-V128-NEXT: lui a0, 16
-; RV64-V128-NEXT: addiw a0, a0, 768
-; RV64-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-V128-NEXT: vmv.s.x v10, a0
-; RV64-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; RV64-V128-NEXT: vrgather.vv v9, v8, v10
-; RV64-V128-NEXT: vmv1r.v v8, v9
-; RV64-V128-NEXT: ret
-;
-; RV32-V512-LABEL: unary_interleave_v4i8_invalid:
-; RV32-V512: # %bb.0:
-; RV32-V512-NEXT: lui a0, 16
-; RV32-V512-NEXT: addi a0, a0, 768
-; RV32-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
-; RV32-V512-NEXT: vmv.s.x v10, a0
-; RV32-V512-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-V512-NEXT: vrgather.vv v9, v8, v10
-; RV32-V512-NEXT: vmv1r.v v8, v9
-; RV32-V512-NEXT: ret
+; V128-LABEL: unary_interleave_v4i8_invalid:
+; V128: # %bb.0:
+; V128-NEXT: lui a0, 16
+; V128-NEXT: addi a0, a0, 768
+; V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; V128-NEXT: vmv.s.x v10, a0
+; V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; V128-NEXT: vrgather.vv v9, v8, v10
+; V128-NEXT: vmv1r.v v8, v9
+; V128-NEXT: ret
;
-; RV64-V512-LABEL: unary_interleave_v4i8_invalid:
-; RV64-V512: # %bb.0:
-; RV64-V512-NEXT: lui a0, 16
-; RV64-V512-NEXT: addiw a0, a0, 768
-; RV64-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
-; RV64-V512-NEXT: vmv.s.x v10, a0
-; RV64-V512-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-V512-NEXT: vrgather.vv v9, v8, v10
-; RV64-V512-NEXT: vmv1r.v v8, v9
-; RV64-V512-NEXT: ret
+; V512-LABEL: unary_interleave_v4i8_invalid:
+; V512: # %bb.0:
+; V512-NEXT: lui a0, 16
+; V512-NEXT: addi a0, a0, 768
+; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; V512-NEXT: vmv.s.x v10, a0
+; V512-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; V512-NEXT: vrgather.vv v9, v8, v10
+; V512-NEXT: vmv1r.v v8, v9
+; V512-NEXT: ret
%a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 4>
ret <4 x i8> %a
}
@@ -608,31 +549,18 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) {
; FIXME: Is there better codegen we can do here?
define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) {
-; RV32-V128-LABEL: unary_interleave_v4i64:
-; RV32-V128: # %bb.0:
-; RV32-V128-NEXT: lui a0, 12304
-; RV32-V128-NEXT: addi a0, a0, 512
-; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-V128-NEXT: vmv.s.x v10, a0
-; RV32-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-V128-NEXT: vsext.vf2 v12, v10
-; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12
-; RV32-V128-NEXT: vmv.v.v v8, v10
-; RV32-V128-NEXT: ret
-;
-; RV64-V128-LABEL: unary_interleave_v4i64:
-; RV64-V128: # %bb.0:
-; RV64-V128-NEXT: lui a0, 12304
-; RV64-V128-NEXT: addiw a0, a0, 512
-; RV64-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-V128-NEXT: vmv.s.x v10, a0
-; RV64-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-V128-NEXT: vsext.vf2 v12, v10
-; RV64-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-V128-NEXT: vrgatherei16.vv v10, v8, v12
-; RV64-V128-NEXT: vmv.v.v v8, v10
-; RV64-V128-NEXT: ret
+; V128-LABEL: unary_interleave_v4i64:
+; V128: # %bb.0:
+; V128-NEXT: lui a0, 12304
+; V128-NEXT: addi a0, a0, 512
+; V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; V128-NEXT: vmv.s.x v10, a0
+; V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; V128-NEXT: vsext.vf2 v12, v10
+; V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; V128-NEXT: vrgatherei16.vv v10, v8, v12
+; V128-NEXT: vmv.v.v v8, v10
+; V128-NEXT: ret
;
; RV32-V512-LABEL: unary_interleave_v4i64:
; RV32-V512: # %bb.0:
@@ -650,7 +578,7 @@ define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) {
; RV64-V512-LABEL: unary_interleave_v4i64:
; RV64-V512: # %bb.0:
; RV64-V512-NEXT: lui a0, 12304
-; RV64-V512-NEXT: addiw a0, a0, 512
+; RV64-V512-NEXT: addi a0, a0, 512
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-V512-NEXT: vmv.s.x v9, a0
; RV64-V512-NEXT: vsext.vf8 v10, v9
@@ -750,3 +678,6 @@ define <4 x i8> @unary_interleave_10uu_v4i8(<4 x i8> %x) {
ret <4 x i8> %a
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32-V128: {{.*}}
+; RV64-V128: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 927fd3e203355c0..a56a81f5f793bc2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -51,57 +51,33 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) {
}
define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) {
-; RV32-LABEL: vrgather_permute_shuffle_vu_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 4096
-; RV32-NEXT: addi a0, a0, 513
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsext.vf2 v10, v9
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrgather_permute_shuffle_vu_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 4096
-; RV64-NEXT: addiw a0, a0, 513
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsext.vf2 v10, v9
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 4096
+; CHECK-NEXT: addi a0, a0, 513
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v10, v9
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
ret <4 x i16> %s
}
define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) {
-; RV32-LABEL: vrgather_permute_shuffle_uv_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 4096
-; RV32-NEXT: addi a0, a0, 513
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsext.vf2 v10, v9
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrgather_permute_shuffle_uv_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 4096
-; RV64-NEXT: addiw a0, a0, 513
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsext.vf2 v10, v9
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 4096
+; CHECK-NEXT: addi a0, a0, 513
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v10, v9
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
ret <4 x i16> %s
}
@@ -439,33 +415,19 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) {
}
define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) {
-; RV32-LABEL: splat_ve2_we0_ins_i2ve4:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 8256
-; RV32-NEXT: addi a0, a0, 514
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v11, a0
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV32-NEXT: li a0, 66
-; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vrgather.vv v10, v8, v11
-; RV32-NEXT: vrgather.vi v10, v9, 0, v0.t
-; RV32-NEXT: vmv1r.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: splat_ve2_we0_ins_i2ve4:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 8256
-; RV64-NEXT: addiw a0, a0, 514
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v11, a0
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV64-NEXT: li a0, 66
-; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vrgather.vv v10, v8, v11
-; RV64-NEXT: vrgather.vi v10, v9, 0, v0.t
-; RV64-NEXT: vmv1r.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: splat_ve2_we0_ins_i2ve4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8256
+; CHECK-NEXT: addi a0, a0, 514
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v11, a0
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: li a0, 66
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vrgather.vv v10, v8, v11
+; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
%shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 2, i32 2, i32 2, i32 8, i32 2>
ret <8 x i8> %shuff
}
@@ -490,43 +452,24 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) {
}
define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) {
-; RV32-LABEL: splat_ve2_we0_ins_i2ve4_i5we6:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vmv.v.i v10, 6
-; RV32-NEXT: vmv.v.i v11, 0
-; RV32-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
-; RV32-NEXT: vslideup.vi v11, v10, 5
-; RV32-NEXT: lui a0, 8256
-; RV32-NEXT: addi a0, a0, 2
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v12, a0
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV32-NEXT: li a0, 98
-; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vrgather.vv v10, v8, v12
-; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t
-; RV32-NEXT: vmv1r.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: splat_ve2_we0_ins_i2ve4_i5we6:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vmv.v.i v10, 6
-; RV64-NEXT: vmv.v.i v11, 0
-; RV64-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
-; RV64-NEXT: vslideup.vi v11, v10, 5
-; RV64-NEXT: lui a0, 8256
-; RV64-NEXT: addiw a0, a0, 2
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v12, a0
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV64-NEXT: li a0, 98
-; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vrgather.vv v10, v8, v12
-; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t
-; RV64-NEXT: vmv1r.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: splat_ve2_we0_ins_i2ve4_i5we6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 6
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vi v11, v10, 5
+; CHECK-NEXT: lui a0, 8256
+; CHECK-NEXT: addi a0, a0, 2
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: li a0, 98
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
%shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 2, i32 2, i32 14, i32 8, i32 2>
ret <8 x i8> %shuff
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index b2a9813e50a1868..e6868abdb5b1d71 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1099,99 +1099,52 @@ define void @urem_v2i64(ptr %x, ptr %y) {
}
define void @mulhu_v16i8(ptr %x) {
-; RV32-LABEL: mulhu_v16i8:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
-; RV32-NEXT: lui a1, 3
-; RV32-NEXT: addi a1, a1, -2044
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vmv.s.x v0, a1
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: li a1, -128
-; RV32-NEXT: vmerge.vxm v10, v9, a1, v0
-; RV32-NEXT: lui a1, 1
-; RV32-NEXT: addi a2, a1, 32
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vmv.s.x v0, a2
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: lui a2, %hi(.LCPI65_0)
-; RV32-NEXT: addi a2, a2, %lo(.LCPI65_0)
-; RV32-NEXT: vle8.v v11, (a2)
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: vsrl.vv v9, v8, v9
-; RV32-NEXT: vmulhu.vv v9, v9, v11
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: vmulhu.vv v8, v8, v10
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: li a2, 513
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vmv.s.x v0, a2
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vmv.v.i v9, 4
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: addi a1, a1, 78
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vmv.s.x v0, a1
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vmerge.vim v9, v9, 3, v0
-; RV32-NEXT: lui a1, 8
-; RV32-NEXT: addi a1, a1, 304
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vmv.s.x v0, a1
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vmerge.vim v9, v9, 2, v0
-; RV32-NEXT: vsrl.vv v8, v8, v9
-; RV32-NEXT: vse8.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhu_v16i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vle8.v v8, (a0)
-; RV64-NEXT: lui a1, 3
-; RV64-NEXT: addiw a1, a1, -2044
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vmv.s.x v0, a1
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vmv.v.i v9, 0
-; RV64-NEXT: li a1, -128
-; RV64-NEXT: vmerge.vxm v10, v9, a1, v0
-; RV64-NEXT: lui a1, 1
-; RV64-NEXT: addiw a2, a1, 32
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vmv.s.x v0, a2
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: lui a2, %hi(.LCPI65_0)
-; RV64-NEXT: addi a2, a2, %lo(.LCPI65_0)
-; RV64-NEXT: vle8.v v11, (a2)
-; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: vsrl.vv v9, v8, v9
-; RV64-NEXT: vmulhu.vv v9, v9, v11
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: vmulhu.vv v8, v8, v10
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: li a2, 513
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vmv.s.x v0, a2
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vmv.v.i v9, 4
-; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: addiw a1, a1, 78
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vmv.s.x v0, a1
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vmerge.vim v9, v9, 3, v0
-; RV64-NEXT: lui a1, 8
-; RV64-NEXT: addiw a1, a1, 304
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vmv.s.x v0, a1
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vmerge.vim v9, v9, 2, v0
-; RV64-NEXT: vsrl.vv v8, v8, v9
-; RV64-NEXT: vse8.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhu_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: lui a1, 3
+; CHECK-NEXT: addi a1, a1, -2044
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a1
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: li a1, -128
+; CHECK-NEXT: vmerge.vxm v10, v9, a1, v0
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: addi a2, a1, 32
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a2
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: lui a2, %hi(.LCPI65_0)
+; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0)
+; CHECK-NEXT: vle8.v v11, (a2)
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vsrl.vv v9, v8, v9
+; CHECK-NEXT: vmulhu.vv v9, v9, v11
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: vmulhu.vv v8, v8, v10
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: li a2, 513
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a2
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 4
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: addi a1, a1, 78
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a1
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v9, v9, 3, v0
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: addi a1, a1, 304
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a1
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v9, v9, 2, v0
+; CHECK-NEXT: vsrl.vv v8, v8, v9
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <16 x i8>, ptr %x
%b = udiv <16 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25>
store <16 x i8> %b, ptr %x
@@ -1238,53 +1191,29 @@ define void @mulhu_v8i16(ptr %x) {
}
define void @mulhu_v6i16(ptr %x) {
-; RV32-LABEL: mulhu_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vadd.vi v9, v9, 12
-; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vdivu.vv v9, v10, v9
-; RV32-NEXT: lui a1, 45217
-; RV32-NEXT: addi a1, a1, -1785
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v10, a1
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsext.vf2 v11, v10
-; RV32-NEXT: vdivu.vv v8, v8, v11
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v8, v9, 4
-; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhu_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vadd.vi v9, v9, 12
-; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 4
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vdivu.vv v9, v10, v9
-; RV64-NEXT: lui a1, 45217
-; RV64-NEXT: addiw a1, a1, -1785
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v10, a1
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsext.vf2 v11, v10
-; RV64-NEXT: vdivu.vv v8, v8, v11
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vslideup.vi v8, v9, 4
-; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhu_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vadd.vi v9, v9, 12
+; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vdivu.vv v9, v10, v9
+; CHECK-NEXT: lui a1, 45217
+; CHECK-NEXT: addi a1, a1, -1785
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v11, v10
+; CHECK-NEXT: vdivu.vv v8, v8, v11
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 4
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = udiv <6 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13>
store <6 x i16> %b, ptr %x
@@ -1292,55 +1221,30 @@ define void @mulhu_v6i16(ptr %x) {
}
define void @mulhu_v4i32(ptr %x) {
-; RV32-LABEL: mulhu_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: vmv.s.x v9, a1
-; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV32-NEXT: vslideup.vi v10, v9, 2
-; RV32-NEXT: lui a1, %hi(.LCPI68_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI68_0)
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vle32.v v9, (a1)
-; RV32-NEXT: vmulhu.vv v9, v8, v9
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: vmulhu.vv v8, v8, v10
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: lui a1, 4128
-; RV32-NEXT: addi a1, a1, 514
-; RV32-NEXT: vmv.s.x v9, a1
-; RV32-NEXT: vsext.vf4 v10, v9
-; RV32-NEXT: vsrl.vv v8, v8, v10
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhu_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: lui a1, 524288
-; RV64-NEXT: vmv.s.x v9, a1
-; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: vsetivli zero, 3, e32, m1, tu, ma
-; RV64-NEXT: vslideup.vi v10, v9, 2
-; RV64-NEXT: lui a1, %hi(.LCPI68_0)
-; RV64-NEXT: addi a1, a1, %lo(.LCPI68_0)
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vle32.v v9, (a1)
-; RV64-NEXT: vmulhu.vv v9, v8, v9
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: vmulhu.vv v8, v8, v10
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: lui a1, 4128
-; RV64-NEXT: addiw a1, a1, 514
-; RV64-NEXT: vmv.s.x v9, a1
-; RV64-NEXT: vsext.vf4 v10, v9
-; RV64-NEXT: vsrl.vv v8, v8, v10
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhu_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: lui a1, 524288
+; CHECK-NEXT: vmv.s.x v9, a1
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vslideup.vi v10, v9, 2
+; CHECK-NEXT: lui a1, %hi(.LCPI68_0)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI68_0)
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v9, (a1)
+; CHECK-NEXT: vmulhu.vv v9, v8, v9
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: vmulhu.vv v8, v8, v10
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: lui a1, 4128
+; CHECK-NEXT: addi a1, a1, 514
+; CHECK-NEXT: vmv.s.x v9, a1
+; CHECK-NEXT: vsext.vf4 v10, v9
+; CHECK-NEXT: vsrl.vv v8, v8, v10
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = udiv <4 x i32> %a, <i32 5, i32 6, i32 7, i32 9>
store <4 x i32> %b, ptr %x
@@ -1397,45 +1301,25 @@ define void @mulhu_v2i64(ptr %x) {
}
define void @mulhs_v16i8(ptr %x) {
-; RV32-LABEL: mulhs_v16i8:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
-; RV32-NEXT: li a1, -123
-; RV32-NEXT: vmv.v.x v9, a1
-; RV32-NEXT: lui a1, 5
-; RV32-NEXT: addi a1, a1, -1452
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vmv.s.x v0, a1
-; RV32-NEXT: li a1, 57
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; RV32-NEXT: vmulhu.vv v8, v8, v9
-; RV32-NEXT: vmv.v.i v9, 7
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: vsrl.vv v8, v8, v9
-; RV32-NEXT: vse8.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhs_v16i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vle8.v v8, (a0)
-; RV64-NEXT: li a1, -123
-; RV64-NEXT: vmv.v.x v9, a1
-; RV64-NEXT: lui a1, 5
-; RV64-NEXT: addiw a1, a1, -1452
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vmv.s.x v0, a1
-; RV64-NEXT: li a1, 57
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; RV64-NEXT: vmulhu.vv v8, v8, v9
-; RV64-NEXT: vmv.v.i v9, 7
-; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: vsrl.vv v8, v8, v9
-; RV64-NEXT: vse8.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhs_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: li a1, -123
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: lui a1, 5
+; CHECK-NEXT: addi a1, a1, -1452
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a1
+; CHECK-NEXT: li a1, 57
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0
+; CHECK-NEXT: vmulhu.vv v8, v8, v9
+; CHECK-NEXT: vmv.v.i v9, 7
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vsrl.vv v8, v8, v9
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <16 x i8>, ptr %x
%b = udiv <16 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
store <16 x i8> %b, ptr %x
@@ -1443,43 +1327,24 @@ define void @mulhs_v16i8(ptr %x) {
}
define void @mulhs_v8i16(ptr %x) {
-; RV32-LABEL: mulhs_v8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: lui a1, 5
-; RV32-NEXT: addi a1, a1, -1755
-; RV32-NEXT: vmv.v.x v9, a1
-; RV32-NEXT: li a1, 105
-; RV32-NEXT: vmv.s.x v0, a1
-; RV32-NEXT: lui a1, 1048571
-; RV32-NEXT: addi a1, a1, 1755
-; RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; RV32-NEXT: vmulh.vv v8, v8, v9
-; RV32-NEXT: vsra.vi v8, v8, 1
-; RV32-NEXT: vsrl.vi v9, v8, 15
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhs_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: lui a1, 5
-; RV64-NEXT: addiw a1, a1, -1755
-; RV64-NEXT: vmv.v.x v9, a1
-; RV64-NEXT: li a1, 105
-; RV64-NEXT: vmv.s.x v0, a1
-; RV64-NEXT: lui a1, 1048571
-; RV64-NEXT: addiw a1, a1, 1755
-; RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; RV64-NEXT: vmulh.vv v8, v8, v9
-; RV64-NEXT: vsra.vi v8, v8, 1
-; RV64-NEXT: vsrl.vi v9, v8, 15
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhs_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, 5
+; CHECK-NEXT: addi a1, a1, -1755
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: li a1, 105
+; CHECK-NEXT: vmv.s.x v0, a1
+; CHECK-NEXT: lui a1, 1048571
+; CHECK-NEXT: addi a1, a1, 1755
+; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0
+; CHECK-NEXT: vmulh.vv v8, v8, v9
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsrl.vi v9, v8, 15
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = sdiv <8 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7>
store <8 x i16> %b, ptr %x
@@ -1487,57 +1352,31 @@ define void @mulhs_v8i16(ptr %x) {
}
define void @mulhs_v6i16(ptr %x) {
-; RV32-LABEL: mulhs_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vmv.v.i v9, 7
-; RV32-NEXT: vid.v v10
-; RV32-NEXT: li a1, -14
-; RV32-NEXT: vmadd.vx v10, a1, v9
-; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 4
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vdiv.vv v9, v9, v10
-; RV32-NEXT: lui a1, 1020016
-; RV32-NEXT: addi a1, a1, 2041
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v10, a1
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsext.vf2 v11, v10
-; RV32-NEXT: vdiv.vv v8, v8, v11
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v8, v9, 4
-; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhs_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vmv.v.i v9, 7
-; RV64-NEXT: vid.v v10
-; RV64-NEXT: li a1, -14
-; RV64-NEXT: vmadd.vx v10, a1, v9
-; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 4
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vdiv.vv v9, v9, v10
-; RV64-NEXT: lui a1, 1020016
-; RV64-NEXT: addiw a1, a1, 2041
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v10, a1
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsext.vf2 v11, v10
-; RV64-NEXT: vdiv.vv v8, v8, v11
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vslideup.vi v8, v9, 4
-; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhs_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 7
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: li a1, -14
+; CHECK-NEXT: vmadd.vx v10, a1, v9
+; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v9, v8, 4
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vdiv.vv v9, v9, v10
+; CHECK-NEXT: lui a1, 1020016
+; CHECK-NEXT: addi a1, a1, 2041
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v11, v10
+; CHECK-NEXT: vdiv.vv v8, v8, v11
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 4
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = sdiv <6 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7>
store <6 x i16> %b, ptr %x
@@ -5027,105 +4866,55 @@ define void @extract_v4i64(ptr %x, ptr %y) {
}
define void @mulhu_v32i8(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhu_v32i8:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: li a1, 32
-; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle8.v v8, (a0)
-; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0
-; LMULMAX2-RV32-NEXT: lui a2, 163907
-; LMULMAX2-RV32-NEXT: addi a2, a2, -2044
-; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT: li a2, -128
-; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v10, a2, v0
-; LMULMAX2-RV32-NEXT: lui a2, 66049
-; LMULMAX2-RV32-NEXT: addi a2, a2, 32
-; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI181_0)
-; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI181_0)
-; LMULMAX2-RV32-NEXT: vle8.v v14, (a2)
-; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV32-NEXT: vsrl.vv v10, v8, v10
-; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vmv.v.i v10, 4
-; LMULMAX2-RV32-NEXT: lui a2, 8208
-; LMULMAX2-RV32-NEXT: addi a2, a2, 513
-; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV32-NEXT: lui a2, 66785
-; LMULMAX2-RV32-NEXT: addi a2, a2, 78
-; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 3, v0
-; LMULMAX2-RV32-NEXT: lui a2, 529160
-; LMULMAX2-RV32-NEXT: addi a2, a2, 304
-; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 2, v0
-; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vse8.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: mulhu_v32i8:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: li a1, 32
-; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle8.v v8, (a0)
-; LMULMAX2-RV64-NEXT: vmv.v.i v10, 0
-; LMULMAX2-RV64-NEXT: lui a2, 163907
-; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044
-; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT: li a2, -128
-; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v10, a2, v0
-; LMULMAX2-RV64-NEXT: lui a2, 66049
-; LMULMAX2-RV64-NEXT: addiw a2, a2, 32
-; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI181_0)
-; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI181_0)
-; LMULMAX2-RV64-NEXT: vle8.v v14, (a2)
-; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV64-NEXT: vsrl.vv v10, v8, v10
-; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14
-; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vmv.v.i v10, 4
-; LMULMAX2-RV64-NEXT: lui a2, 8208
-; LMULMAX2-RV64-NEXT: addiw a2, a2, 513
-; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV64-NEXT: lui a2, 66785
-; LMULMAX2-RV64-NEXT: addiw a2, a2, 78
-; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 3, v0
-; LMULMAX2-RV64-NEXT: lui a2, 529160
-; LMULMAX2-RV64-NEXT: addiw a2, a2, 304
-; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 2, v0
-; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vse8.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
+; LMULMAX2-LABEL: mulhu_v32i8:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: li a1, 32
+; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: vle8.v v8, (a0)
+; LMULMAX2-NEXT: vmv.v.i v10, 0
+; LMULMAX2-NEXT: lui a2, 163907
+; LMULMAX2-NEXT: addi a2, a2, -2044
+; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a2
+; LMULMAX2-NEXT: li a2, -128
+; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: vmerge.vxm v12, v10, a2, v0
+; LMULMAX2-NEXT: lui a2, 66049
+; LMULMAX2-NEXT: addi a2, a2, 32
+; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a2
+; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: lui a2, %hi(.LCPI181_0)
+; LMULMAX2-NEXT: addi a2, a2, %lo(.LCPI181_0)
+; LMULMAX2-NEXT: vle8.v v14, (a2)
+; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
+; LMULMAX2-NEXT: vsrl.vv v10, v8, v10
+; LMULMAX2-NEXT: vmulhu.vv v10, v10, v14
+; LMULMAX2-NEXT: vsub.vv v8, v8, v10
+; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12
+; LMULMAX2-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-NEXT: vmv.v.i v10, 4
+; LMULMAX2-NEXT: lui a2, 8208
+; LMULMAX2-NEXT: addi a2, a2, 513
+; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a2
+; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
+; LMULMAX2-NEXT: lui a2, 66785
+; LMULMAX2-NEXT: addi a2, a2, 78
+; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a2
+; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: vmerge.vim v10, v10, 3, v0
+; LMULMAX2-NEXT: lui a2, 529160
+; LMULMAX2-NEXT: addi a2, a2, 304
+; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a2
+; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: vmerge.vim v10, v10, 2, v0
+; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
+; LMULMAX2-NEXT: vse8.v v8, (a0)
+; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: mulhu_v32i8:
; LMULMAX1: # %bb.0:
@@ -5242,57 +5031,31 @@ define void @mulhu_v16i16(ptr %x) {
}
define void @mulhu_v8i32(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhu_v8i32:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT: li a1, 68
-; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI183_0)
-; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI183_0)
-; LMULMAX2-RV32-NEXT: vle32.v v10, (a1)
-; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0
-; LMULMAX2-RV32-NEXT: lui a1, 524288
-; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a1, v0
-; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v8, v10
-; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: lui a1, 4128
-; LMULMAX2-RV32-NEXT: addi a1, a1, 514
-; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vsext.vf4 v12, v10
-; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT: vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: mulhu_v8i32:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT: li a1, 68
-; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1
-; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI183_0)
-; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI183_0)
-; LMULMAX2-RV64-NEXT: vle32.v v10, (a1)
-; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0
-; LMULMAX2-RV64-NEXT: lui a1, 524288
-; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0
-; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10
-; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: lui a1, 4128
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 514
-; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1
-; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vsext.vf4 v12, v10
-; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
+; LMULMAX2-LABEL: mulhu_v8i32:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; LMULMAX2-NEXT: vle32.v v8, (a0)
+; LMULMAX2-NEXT: li a1, 68
+; LMULMAX2-NEXT: vmv.s.x v0, a1
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0)
+; LMULMAX2-NEXT: vle32.v v10, (a1)
+; LMULMAX2-NEXT: vmv.v.i v12, 0
+; LMULMAX2-NEXT: lui a1, 524288
+; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0
+; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10
+; LMULMAX2-NEXT: vsub.vv v8, v8, v10
+; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12
+; LMULMAX2-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-NEXT: lui a1, 4128
+; LMULMAX2-NEXT: addi a1, a1, 514
+; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; LMULMAX2-NEXT: vmv.v.x v10, a1
+; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; LMULMAX2-NEXT: vsext.vf4 v12, v10
+; LMULMAX2-NEXT: vsrl.vv v8, v8, v12
+; LMULMAX2-NEXT: vse32.v v8, (a0)
+; LMULMAX2-NEXT: ret
;
; LMULMAX1-RV32-LABEL: mulhu_v8i32:
; LMULMAX1-RV32: # %bb.0:
@@ -5334,7 +5097,7 @@ define void @mulhu_v8i32(ptr %x) {
; LMULMAX1-RV64-NEXT: addi a1, a0, 16
; LMULMAX1-RV64-NEXT: vle32.v v9, (a1)
; LMULMAX1-RV64-NEXT: lui a2, 36976
-; LMULMAX1-RV64-NEXT: addiw a2, a2, 1541
+; LMULMAX1-RV64-NEXT: addi a2, a2, 1541
; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2
; LMULMAX1-RV64-NEXT: vsext.vf4 v11, v10
; LMULMAX1-RV64-NEXT: vdivu.vv v9, v9, v11
@@ -5398,7 +5161,7 @@ define void @mulhu_v4i64(ptr %x) {
; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12
; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-RV64-NEXT: lui a1, 12320
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 513
+; LMULMAX2-RV64-NEXT: addi a1, a1, 513
; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1
; LMULMAX2-RV64-NEXT: vsext.vf8 v12, v10
; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12
@@ -5481,85 +5244,45 @@ define void @mulhu_v4i64(ptr %x) {
}
define void @mulhs_v32i8(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhs_v32i8:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: li a1, 32
-; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle8.v v8, (a0)
-; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7
-; LMULMAX2-RV32-NEXT: lui a2, 304453
-; LMULMAX2-RV32-NEXT: addi a2, a2, -1452
-; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV32-NEXT: li a1, -123
-; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1
-; LMULMAX2-RV32-NEXT: li a1, 57
-; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a1, v0
-; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vse8.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: mulhs_v32i8:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: li a1, 32
-; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle8.v v8, (a0)
-; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7
-; LMULMAX2-RV64-NEXT: lui a2, 304453
-; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452
-; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
-; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0
-; LMULMAX2-RV64-NEXT: li a1, -123
-; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1
-; LMULMAX2-RV64-NEXT: li a1, 57
-; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0
-; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vse8.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
-;
-; LMULMAX1-RV32-LABEL: mulhs_v32i8:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT: addi a1, a0, 16
-; LMULMAX1-RV32-NEXT: vle8.v v9, (a1)
-; LMULMAX1-RV32-NEXT: lui a2, 5
-; LMULMAX1-RV32-NEXT: addi a2, a2, -1452
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2
-; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v10, -9
-; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 9, v0
-; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vse8.v v9, (a1)
-; LMULMAX1-RV32-NEXT: ret
+; LMULMAX2-LABEL: mulhs_v32i8:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: li a1, 32
+; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: vle8.v v8, (a0)
+; LMULMAX2-NEXT: vmv.v.i v10, 7
+; LMULMAX2-NEXT: lui a2, 304453
+; LMULMAX2-NEXT: addi a2, a2, -1452
+; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; LMULMAX2-NEXT: vmv.s.x v0, a2
+; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
+; LMULMAX2-NEXT: li a1, -123
+; LMULMAX2-NEXT: vmv.v.x v12, a1
+; LMULMAX2-NEXT: li a1, 57
+; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0
+; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12
+; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
+; LMULMAX2-NEXT: vse8.v v8, (a0)
+; LMULMAX2-NEXT: ret
;
-; LMULMAX1-RV64-LABEL: mulhs_v32i8:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT: addi a1, a0, 16
-; LMULMAX1-RV64-NEXT: vle8.v v9, (a1)
-; LMULMAX1-RV64-NEXT: lui a2, 5
-; LMULMAX1-RV64-NEXT: addiw a2, a2, -1452
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; LMULMAX1-RV64-NEXT: vmv.s.x v0, a2
-; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vmv.v.i v10, -9
-; LMULMAX1-RV64-NEXT: vmerge.vim v10, v10, 9, v0
-; LMULMAX1-RV64-NEXT: vdivu.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vse8.v v9, (a1)
-; LMULMAX1-RV64-NEXT: ret
+; LMULMAX1-LABEL: mulhs_v32i8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; LMULMAX1-NEXT: vle8.v v8, (a0)
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vle8.v v9, (a1)
+; LMULMAX1-NEXT: lui a2, 5
+; LMULMAX1-NEXT: addi a2, a2, -1452
+; LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; LMULMAX1-NEXT: vmv.s.x v0, a2
+; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; LMULMAX1-NEXT: vmv.v.i v10, -9
+; LMULMAX1-NEXT: vmerge.vim v10, v10, 9, v0
+; LMULMAX1-NEXT: vdivu.vv v9, v9, v10
+; LMULMAX1-NEXT: vdivu.vv v8, v8, v10
+; LMULMAX1-NEXT: vse8.v v8, (a0)
+; LMULMAX1-NEXT: vse8.v v9, (a1)
+; LMULMAX1-NEXT: ret
%a = load <32 x i8>, ptr %x
%b = udiv <32 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
store <32 x i8> %b, ptr %x
@@ -5567,45 +5290,25 @@ define void @mulhs_v32i8(ptr %x) {
}
define void @mulhs_v16i16(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhs_v16i16:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: lui a1, 5
-; LMULMAX2-RV32-NEXT: addi a1, a1, -1755
-; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT: lui a1, 7
-; LMULMAX2-RV32-NEXT: addi a1, a1, -1687
-; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT: lui a1, 1048571
-; LMULMAX2-RV32-NEXT: addi a1, a1, 1755
-; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0
-; LMULMAX2-RV32-NEXT: vmulh.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vsra.vi v8, v8, 1
-; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 15
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: mulhs_v16i16:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: lui a1, 5
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -1755
-; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1
-; LMULMAX2-RV64-NEXT: lui a1, 7
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -1687
-; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1
-; LMULMAX2-RV64-NEXT: lui a1, 1048571
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 1755
-; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0
-; LMULMAX2-RV64-NEXT: vmulh.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vsra.vi v8, v8, 1
-; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 15
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
+; LMULMAX2-LABEL: mulhs_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; LMULMAX2-NEXT: vle16.v v8, (a0)
+; LMULMAX2-NEXT: lui a1, 5
+; LMULMAX2-NEXT: addi a1, a1, -1755
+; LMULMAX2-NEXT: vmv.v.x v10, a1
+; LMULMAX2-NEXT: lui a1, 7
+; LMULMAX2-NEXT: addi a1, a1, -1687
+; LMULMAX2-NEXT: vmv.s.x v0, a1
+; LMULMAX2-NEXT: lui a1, 1048571
+; LMULMAX2-NEXT: addi a1, a1, 1755
+; LMULMAX2-NEXT: vmerge.vxm v10, v10, a1, v0
+; LMULMAX2-NEXT: vmulh.vv v8, v8, v10
+; LMULMAX2-NEXT: vsra.vi v8, v8, 1
+; LMULMAX2-NEXT: vsrl.vi v10, v8, 15
+; LMULMAX2-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-NEXT: vse16.v v8, (a0)
+; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: mulhs_v16i16:
; LMULMAX1: # %bb.0:
@@ -5764,14 +5467,14 @@ define void @mulhs_v4i64(ptr %x) {
; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0
; LMULMAX2-RV64-NEXT: vmulh.vv v10, v8, v10
; LMULMAX2-RV64-NEXT: lui a1, 1044496
-; LMULMAX2-RV64-NEXT: addiw a1, a1, -256
+; LMULMAX2-RV64-NEXT: addi a1, a1, -256
; LMULMAX2-RV64-NEXT: vmv.s.x v12, a1
; LMULMAX2-RV64-NEXT: vsext.vf8 v14, v12
; LMULMAX2-RV64-NEXT: vmadd.vv v14, v8, v10
; LMULMAX2-RV64-NEXT: li a1, 63
; LMULMAX2-RV64-NEXT: vsrl.vx v8, v14, a1
; LMULMAX2-RV64-NEXT: lui a1, 4096
-; LMULMAX2-RV64-NEXT: addiw a1, a1, 256
+; LMULMAX2-RV64-NEXT: addi a1, a1, 256
; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1
; LMULMAX2-RV64-NEXT: vsext.vf8 v12, v10
; LMULMAX2-RV64-NEXT: vsra.vv v10, v14, v12
@@ -8356,33 +8059,19 @@ define void @mulhu_vx_v16i8(ptr %x) {
}
define void @mulhu_vx_v8i16(ptr %x) {
-; RV32-LABEL: mulhu_vx_v8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: lui a1, 2
-; RV32-NEXT: addi a1, a1, 1171
-; RV32-NEXT: vmulhu.vx v9, v8, a1
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v8, v8, 1
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhu_vx_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: lui a1, 2
-; RV64-NEXT: addiw a1, a1, 1171
-; RV64-NEXT: vmulhu.vx v9, v8, a1
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v8, v8, 1
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhu_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, 2
+; CHECK-NEXT: addi a1, a1, 1171
+; CHECK-NEXT: vmulhu.vx v9, v8, a1
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v8, v8, 1
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
store <8 x i16> %b, ptr %x
@@ -8390,27 +8079,16 @@ define void @mulhu_vx_v8i16(ptr %x) {
}
define void @mulhu_vx_v4i32(ptr %x) {
-; RV32-LABEL: mulhu_vx_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: lui a1, 838861
-; RV32-NEXT: addi a1, a1, -819
-; RV32-NEXT: vmulhu.vx v8, v8, a1
-; RV32-NEXT: vsrl.vi v8, v8, 2
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhu_vx_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: lui a1, 838861
-; RV64-NEXT: addiw a1, a1, -819
-; RV64-NEXT: vmulhu.vx v8, v8, a1
-; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhu_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: lui a1, 838861
+; CHECK-NEXT: addi a1, a1, -819
+; CHECK-NEXT: vmulhu.vx v8, v8, a1
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %x
%b = udiv <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
store <4 x i32> %b, ptr %x
@@ -8472,31 +8150,18 @@ define void @mulhs_vx_v16i8(ptr %x) {
}
define void @mulhs_vx_v8i16(ptr %x) {
-; RV32-LABEL: mulhs_vx_v8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: lui a1, 5
-; RV32-NEXT: addi a1, a1, -1755
-; RV32-NEXT: vmulh.vx v8, v8, a1
-; RV32-NEXT: vsra.vi v8, v8, 1
-; RV32-NEXT: vsrl.vi v9, v8, 15
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhs_vx_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: lui a1, 5
-; RV64-NEXT: addiw a1, a1, -1755
-; RV64-NEXT: vmulh.vx v8, v8, a1
-; RV64-NEXT: vsra.vi v8, v8, 1
-; RV64-NEXT: vsrl.vi v9, v8, 15
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: vse16.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhs_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, 5
+; CHECK-NEXT: addi a1, a1, -1755
+; CHECK-NEXT: vmulh.vx v8, v8, a1
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsrl.vi v9, v8, 15
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <8 x i16>, ptr %x
%b = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
store <8 x i16> %b, ptr %x
@@ -8522,7 +8187,7 @@ define void @mulhs_vx_v4i32(ptr %x) {
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: lui a1, 629146
-; RV64-NEXT: addiw a1, a1, -1639
+; RV64-NEXT: addi a1, a1, -1639
; RV64-NEXT: vmulh.vx v8, v8, a1
; RV64-NEXT: vsra.vi v8, v8, 1
; RV64-NEXT: vsrl.vi v9, v8, 31
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index b3099f6b57056ed..eeb8e517d01d2d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -845,7 +845,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
; RV64-NEXT: lui a1, 16
-; RV64-NEXT: addiw a1, a1, 7
+; RV64-NEXT: addi a1, a1, 7
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.x v12, a1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
@@ -917,7 +917,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
; RV64-NEXT: vmv.v.v v4, v8
; RV64-NEXT: lui a1, 112
-; RV64-NEXT: addiw a1, a1, 1
+; RV64-NEXT: addi a1, a1, 1
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.x v12, a1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index eae615db137eaac..d23c494ba37a061 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -426,21 +426,21 @@ define <10 x i1> @buildvec_mask_v10i1() {
}
define <16 x i1> @buildvec_mask_v16i1() {
-; CHECK-RV32-LABEL: buildvec_mask_v16i1:
-; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lui a0, 11
-; CHECK-RV32-NEXT: addi a0, a0, 1718
-; CHECK-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-RV32-NEXT: vmv.s.x v0, a0
-; CHECK-RV32-NEXT: ret
-;
-; CHECK-RV64-LABEL: buildvec_mask_v16i1:
-; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lui a0, 11
-; CHECK-RV64-NEXT: addiw a0, a0, 1718
-; CHECK-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-RV64-NEXT: vmv.s.x v0, a0
-; CHECK-RV64-NEXT: ret
+; CHECK-LABEL: buildvec_mask_v16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 11
+; CHECK-NEXT: addi a0, a0, 1718
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: ret
+;
+; ZVE32F-LABEL: buildvec_mask_v16i1:
+; ZVE32F: # %bb.0:
+; ZVE32F-NEXT: lui a0, 11
+; ZVE32F-NEXT: addi a0, a0, 1718
+; ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; ZVE32F-NEXT: vmv.s.x v0, a0
+; ZVE32F-NEXT: ret
ret <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>
}
@@ -478,7 +478,7 @@ define <32 x i1> @buildvec_mask_v32i1() {
; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0
; RV64-LMULMAX1-NEXT: lui a0, 11
-; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT: addi a0, a0, 1718
; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0
; RV64-LMULMAX1-NEXT: ret
;
@@ -493,7 +493,7 @@ define <32 x i1> @buildvec_mask_v32i1() {
; RV64-LMULMAX2-LABEL: buildvec_mask_v32i1:
; RV64-LMULMAX2: # %bb.0:
; RV64-LMULMAX2-NEXT: lui a0, 748384
-; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT: addi a0, a0, 1776
; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0
; RV64-LMULMAX2-NEXT: ret
@@ -509,7 +509,7 @@ define <32 x i1> @buildvec_mask_v32i1() {
; RV64-LMULMAX4-LABEL: buildvec_mask_v32i1:
; RV64-LMULMAX4: # %bb.0:
; RV64-LMULMAX4-NEXT: lui a0, 748384
-; RV64-LMULMAX4-NEXT: addiw a0, a0, 1776
+; RV64-LMULMAX4-NEXT: addi a0, a0, 1776
; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0
; RV64-LMULMAX4-NEXT: ret
@@ -525,10 +525,18 @@ define <32 x i1> @buildvec_mask_v32i1() {
; RV64-LMULMAX8-LABEL: buildvec_mask_v32i1:
; RV64-LMULMAX8: # %bb.0:
; RV64-LMULMAX8-NEXT: lui a0, 748384
-; RV64-LMULMAX8-NEXT: addiw a0, a0, 1776
+; RV64-LMULMAX8-NEXT: addi a0, a0, 1776
; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0
; RV64-LMULMAX8-NEXT: ret
+;
+; ZVE32F-LABEL: buildvec_mask_v32i1:
+; ZVE32F: # %bb.0:
+; ZVE32F-NEXT: lui a0, 748384
+; ZVE32F-NEXT: addi a0, a0, 1776
+; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; ZVE32F-NEXT: vmv.s.x v0, a0
+; ZVE32F-NEXT: ret
ret <32 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>
}
@@ -553,10 +561,10 @@ define <64 x i1> @buildvec_mask_v64i1() {
; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0
; RV64-LMULMAX1-NEXT: lui a0, 4
-; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793
+; RV64-LMULMAX1-NEXT: addi a0, a0, -1793
; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0
; RV64-LMULMAX1-NEXT: lui a0, 11
-; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT: addi a0, a0, 1718
; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0
; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8
; RV64-LMULMAX1-NEXT: ret
@@ -575,11 +583,11 @@ define <64 x i1> @buildvec_mask_v64i1() {
; RV64-LMULMAX2-LABEL: buildvec_mask_v64i1:
; RV64-LMULMAX2: # %bb.0:
; RV64-LMULMAX2-NEXT: lui a0, 748384
-; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT: addi a0, a0, 1776
; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0
; RV64-LMULMAX2-NEXT: lui a0, 748388
-; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793
+; RV64-LMULMAX2-NEXT: addi a0, a0, -1793
; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0
; RV64-LMULMAX2-NEXT: ret
;
@@ -622,6 +630,18 @@ define <64 x i1> @buildvec_mask_v64i1() {
; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-LMULMAX8-NEXT: vle64.v v0, (a0)
; RV64-LMULMAX8-NEXT: ret
+;
+; ZVE32F-LABEL: buildvec_mask_v64i1:
+; ZVE32F: # %bb.0:
+; ZVE32F-NEXT: lui a0, 748388
+; ZVE32F-NEXT: addi a0, a0, -1793
+; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; ZVE32F-NEXT: vmv.v.x v0, a0
+; ZVE32F-NEXT: lui a0, 748384
+; ZVE32F-NEXT: addi a0, a0, 1776
+; ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma
+; ZVE32F-NEXT: vmv.s.x v0, a0
+; ZVE32F-NEXT: ret
ret <64 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>
}
@@ -654,16 +674,16 @@ define <128 x i1> @buildvec_mask_v128i1() {
; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0
; RV64-LMULMAX1-NEXT: lui a0, 11
-; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT: addi a0, a0, 1718
; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0
; RV64-LMULMAX1-NEXT: lui a0, 8
-; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT: addi a0, a0, 1718
; RV64-LMULMAX1-NEXT: vmv.s.x v12, a0
; RV64-LMULMAX1-NEXT: lui a0, 4
-; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793
+; RV64-LMULMAX1-NEXT: addi a0, a0, -1793
; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0
; RV64-LMULMAX1-NEXT: lui a0, 14
-; RV64-LMULMAX1-NEXT: addiw a0, a0, 1722
+; RV64-LMULMAX1-NEXT: addi a0, a0, 1722
; RV64-LMULMAX1-NEXT: vmv.s.x v14, a0
; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8
; RV64-LMULMAX1-NEXT: vmv1r.v v11, v0
@@ -690,17 +710,17 @@ define <128 x i1> @buildvec_mask_v128i1() {
; RV64-LMULMAX2-LABEL: buildvec_mask_v128i1:
; RV64-LMULMAX2: # %bb.0:
; RV64-LMULMAX2-NEXT: lui a0, 748384
-; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT: addi a0, a0, 1776
; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0
; RV64-LMULMAX2-NEXT: lui a0, 748388
-; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793
+; RV64-LMULMAX2-NEXT: addi a0, a0, -1793
; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0
; RV64-LMULMAX2-NEXT: lui a0, 551776
-; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT: addi a0, a0, 1776
; RV64-LMULMAX2-NEXT: vmv.s.x v9, a0
; RV64-LMULMAX2-NEXT: lui a0, 945060
-; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793
+; RV64-LMULMAX2-NEXT: addi a0, a0, -1793
; RV64-LMULMAX2-NEXT: vmv.s.x v10, a0
; RV64-LMULMAX2-NEXT: ret
;
@@ -794,16 +814,16 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0
; RV64-LMULMAX1-NEXT: lui a0, 11
-; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT: addi a0, a0, 1718
; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0
; RV64-LMULMAX1-NEXT: lui a0, 8
-; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718
+; RV64-LMULMAX1-NEXT: addi a0, a0, 1718
; RV64-LMULMAX1-NEXT: vmv.s.x v12, a0
; RV64-LMULMAX1-NEXT: lui a0, 4
-; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793
+; RV64-LMULMAX1-NEXT: addi a0, a0, -1793
; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0
; RV64-LMULMAX1-NEXT: lui a0, 14
-; RV64-LMULMAX1-NEXT: addiw a0, a0, 1722
+; RV64-LMULMAX1-NEXT: addi a0, a0, 1722
; RV64-LMULMAX1-NEXT: vmv.s.x v14, a0
; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8
; RV64-LMULMAX1-NEXT: vmv1r.v v11, v0
@@ -830,17 +850,17 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
; RV64-LMULMAX2-LABEL: buildvec_mask_optsize_v128i1:
; RV64-LMULMAX2: # %bb.0:
; RV64-LMULMAX2-NEXT: lui a0, 748384
-; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT: addi a0, a0, 1776
; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0
; RV64-LMULMAX2-NEXT: lui a0, 748388
-; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793
+; RV64-LMULMAX2-NEXT: addi a0, a0, -1793
; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0
; RV64-LMULMAX2-NEXT: lui a0, 551776
-; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776
+; RV64-LMULMAX2-NEXT: addi a0, a0, 1776
; RV64-LMULMAX2-NEXT: vmv.s.x v9, a0
; RV64-LMULMAX2-NEXT: lui a0, 945060
-; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793
+; RV64-LMULMAX2-NEXT: addi a0, a0, -1793
; RV64-LMULMAX2-NEXT: vmv.s.x v10, a0
; RV64-LMULMAX2-NEXT: ret
;
@@ -895,3 +915,6 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
; ZVE32F-NEXT: ret
ret <128 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1>
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-RV32: {{.*}}
+; CHECK-RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 480e5c2f8f2b8b6..f8a8ffd3a07970f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -14507,7 +14507,7 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
; RV64V-LABEL: mgather_gather_4xSEW_partial_align:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, 82176
-; RV64V-NEXT: addiw a1, a1, 1024
+; RV64V-NEXT: addi a1, a1, 1024
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vmv.s.x v9, a1
; RV64V-NEXT: vluxei8.v v8, (a0), v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
index 8c96392f08a5dbe..dd9a1118ab821d4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
@@ -649,39 +649,22 @@ define i32 @reduce_smin_16xi32_prefix2(ptr %p) {
}
define i32 @reduce_smin_16xi32_prefix5(ptr %p) {
-; RV32-LABEL: reduce_smin_16xi32_prefix5:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: vmv.s.x v10, a1
-; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; RV32-NEXT: vslideup.vi v8, v10, 5
-; RV32-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; RV32-NEXT: vslideup.vi v8, v10, 6
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslideup.vi v8, v10, 7
-; RV32-NEXT: vredmin.vs v8, v8, v8
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reduce_smin_16xi32_prefix5:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, 524288
-; RV64-NEXT: addiw a1, a1, -1
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vmv.s.x v10, a1
-; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; RV64-NEXT: vslideup.vi v8, v10, 5
-; RV64-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; RV64-NEXT: vslideup.vi v8, v10, 6
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT: vslideup.vi v8, v10, 7
-; RV64-NEXT: vredmin.vs v8, v8, v8
-; RV64-NEXT: vmv.x.s a0, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: reduce_smin_16xi32_prefix5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 524288
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vmv.s.x v10, a1
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 5
+; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 6
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 7
+; CHECK-NEXT: vredmin.vs v8, v8, v8
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
%e0 = extractelement <16 x i32> %v, i32 0
%e1 = extractelement <16 x i32> %v, i32 1
@@ -781,3 +764,6 @@ define i32 @reduce_umin_16xi32_prefix5(ptr %p) {
%umin3 = call i32 @llvm.umin.i32(i32 %umin2, i32 %e4)
ret i32 %umin3
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll
index 1bf832a229b218f..a1d2b5106d5a967 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll
@@ -37,69 +37,39 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
}
define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
-; RV32-LABEL: trn1.v16i8:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vid.v v11
-; RV32-NEXT: vrgather.vv v10, v8, v11
-; RV32-NEXT: vadd.vi v8, v11, -1
-; RV32-NEXT: lui a0, 11
-; RV32-NEXT: addi a0, a0, -1366
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV32-NEXT: vrgather.vv v10, v9, v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: trn1.v16i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vid.v v11
-; RV64-NEXT: vrgather.vv v10, v8, v11
-; RV64-NEXT: vadd.vi v8, v11, -1
-; RV64-NEXT: lui a0, 11
-; RV64-NEXT: addiw a0, a0, -1366
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64-NEXT: vrgather.vv v10, v9, v8, v0.t
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: trn1.v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vid.v v11
+; CHECK-NEXT: vrgather.vv v10, v8, v11
+; CHECK-NEXT: vadd.vi v8, v11, -1
+; CHECK-NEXT: lui a0, 11
+; CHECK-NEXT: addi a0, a0, -1366
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
%tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
ret <16 x i8> %tmp0
}
define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
-; RV32-LABEL: trn2.v16i8:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vid.v v11
-; RV32-NEXT: vadd.vi v12, v11, 1
-; RV32-NEXT: vrgather.vv v10, v8, v12
-; RV32-NEXT: lui a0, 11
-; RV32-NEXT: addi a0, a0, -1366
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: trn2.v16i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vid.v v11
-; RV64-NEXT: vadd.vi v12, v11, 1
-; RV64-NEXT: vrgather.vv v10, v8, v12
-; RV64-NEXT: lui a0, 11
-; RV64-NEXT: addiw a0, a0, -1366
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: trn2.v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vid.v v11
+; CHECK-NEXT: vadd.vi v12, v11, 1
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: lui a0, 11
+; CHECK-NEXT: addi a0, a0, -1366
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
%tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
ret <16 x i8> %tmp0
}
@@ -379,3 +349,6 @@ define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) {
%tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
ret <8 x half> %tmp0
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
index 49daa4413035cf4..fd117f9e8ea0740 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
@@ -352,27 +352,16 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert2(<4 x i8> %v, i8 %b) {
}
define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) {
-; RV32-LABEL: vslide1up_4xi8_neg_incorrect_insert3:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 8208
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v10, a0
-; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vslide1up_4xi8_neg_incorrect_insert3:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 8208
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v10, a0
-; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8208
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 5, i32 4, i32 5, i32 6>
ret <4 x i8> %v2
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index fc0564827036763..32d26827f989e04 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -118,19 +118,12 @@ define void @store_constant_v2i8(ptr %p) {
}
define void @store_constant_v2i16(ptr %p) {
-; RV32-LABEL: store_constant_v2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a1, 96
-; RV32-NEXT: addi a1, a1, 3
-; RV32-NEXT: sw a1, 0(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: store_constant_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, 96
-; RV64-NEXT: addiw a1, a1, 3
-; RV64-NEXT: sw a1, 0(a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: store_constant_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 96
+; CHECK-NEXT: addi a1, a1, 3
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
store <2 x i16> <i16 3, i16 6>, ptr %p
ret void
}
@@ -150,87 +143,52 @@ define void @store_constant_v2i32(ptr %p) {
}
define void @store_constant_v4i8(ptr %p) {
-; RV32-LABEL: store_constant_v4i8:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a1, 4176
-; RV32-NEXT: addi a1, a1, 1539
-; RV32-NEXT: sw a1, 0(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: store_constant_v4i8:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, 4176
-; RV64-NEXT: addiw a1, a1, 1539
-; RV64-NEXT: sw a1, 0(a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: store_constant_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 4176
+; CHECK-NEXT: addi a1, a1, 1539
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
store <4 x i8> <i8 3, i8 6, i8 5, i8 1>, ptr %p
ret void
}
define void @store_constant_v4i16(ptr %p) {
-; RV32-LABEL: store_constant_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a1, 4176
-; RV32-NEXT: addi a1, a1, 1539
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v8, a1
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsext.vf2 v9, v8
-; RV32-NEXT: vse16.v v9, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: store_constant_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, 4176
-; RV64-NEXT: addiw a1, a1, 1539
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v8, a1
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsext.vf2 v9, v8
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: store_constant_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 4176
+; CHECK-NEXT: addi a1, a1, 1539
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v9, v8
+; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: ret
store <4 x i16> <i16 3, i16 6, i16 5, i16 1>, ptr %p
ret void
}
define void @store_constant_v4i32(ptr %p) {
-; RV32-LABEL: store_constant_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a1, 4176
-; RV32-NEXT: addi a1, a1, 1539
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v8, a1
-; RV32-NEXT: vsext.vf4 v9, v8
-; RV32-NEXT: vse32.v v9, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: store_constant_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, 4176
-; RV64-NEXT: addiw a1, a1, 1539
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v8, a1
-; RV64-NEXT: vsext.vf4 v9, v8
-; RV64-NEXT: vse32.v v9, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: store_constant_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 4176
+; CHECK-NEXT: addi a1, a1, 1539
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: vsext.vf4 v9, v8
+; CHECK-NEXT: vse32.v v9, (a0)
+; CHECK-NEXT: ret
store <4 x i32> <i32 3, i32 6, i32 5, i32 1>, ptr %p
ret void
}
define void @store_id_v4i8(ptr %p) {
-; RV32-LABEL: store_id_v4i8:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a1, 12320
-; RV32-NEXT: addi a1, a1, 256
-; RV32-NEXT: sw a1, 0(a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: store_id_v4i8:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, 12320
-; RV64-NEXT: addiw a1, a1, 256
-; RV64-NEXT: sw a1, 0(a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: store_id_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 12320
+; CHECK-NEXT: addi a1, a1, 256
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
store <4 x i8> <i8 0, i8 1, i8 2, i8 3>, ptr %p
ret void
}
@@ -279,3 +237,6 @@ define void @store_constant_v2i8_volatile(ptr %p) {
store volatile <2 x i8> <i8 1, i8 1>, ptr %p
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index 8ac3b7d02e338a8..ef970ad63ae77a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -56,7 +56,7 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
; V: # %bb.0: # %entry
; V-NEXT: li a2, 1024
; V-NEXT: lui a3, 983765
-; V-NEXT: addiw a3, a3, 873
+; V-NEXT: addi a3, a3, 873
; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; V-NEXT: vmv.s.x v0, a3
; V-NEXT: li a3, 32
@@ -80,7 +80,7 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
; ZVE32F: # %bb.0: # %entry
; ZVE32F-NEXT: li a2, 1024
; ZVE32F-NEXT: lui a3, 983765
-; ZVE32F-NEXT: addiw a3, a3, 873
+; ZVE32F-NEXT: addi a3, a3, 873
; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; ZVE32F-NEXT: vmv.s.x v0, a3
; ZVE32F-NEXT: li a3, 32
@@ -331,7 +331,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
; V-NEXT: li a2, 1024
; V-NEXT: li a3, 32
; V-NEXT: lui a4, 983765
-; V-NEXT: addiw a4, a4, 873
+; V-NEXT: addi a4, a4, 873
; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; V-NEXT: vmv.s.x v0, a4
; V-NEXT: li a4, 5
@@ -355,7 +355,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
; ZVE32F-NEXT: li a2, 1024
; ZVE32F-NEXT: li a3, 32
; ZVE32F-NEXT: lui a4, 983765
-; ZVE32F-NEXT: addiw a4, a4, 873
+; ZVE32F-NEXT: addi a4, a4, 873
; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; ZVE32F-NEXT: vmv.s.x v0, a4
; ZVE32F-NEXT: li a4, 5
@@ -838,7 +838,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
; CHECK-NEXT: # %bb.4: # %bb30
; CHECK-NEXT: beq a4, a5, .LBB13_7
; CHECK-NEXT: .LBB13_5: # %bb32
-; CHECK-NEXT: addiw a2, a3, -1024
+; CHECK-NEXT: addi a2, a3, -1024
; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: slli a4, a3, 2
; CHECK-NEXT: add a1, a1, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll
index db2361dd3e58693..ca833451233becb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll
@@ -16,7 +16,7 @@ define i32 @test(i32 %call.i) {
; CHECK-V-NEXT: vslide1down.vx v8, v8, a0
; CHECK-V-NEXT: lui a0, 524288
; CHECK-V-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-V-NEXT: addiw a0, a0, 2
+; CHECK-V-NEXT: addi a0, a0, 2
; CHECK-V-NEXT: vmslt.vx v0, v8, a0
; CHECK-V-NEXT: vmv.v.i v8, 0
; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 8e47dd72ae21814..6cfa504b501bacb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -911,7 +911,7 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v9, a0
; CHECK-V-NEXT: lui a0, 1048568
; CHECK-V-NEXT: vmax.vx v8, v8, a0
@@ -952,7 +952,7 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.xu.f.w v9, v8
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vminu.vx v8, v9, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
@@ -993,7 +993,7 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v9, a0
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
@@ -1073,7 +1073,7 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) {
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: lui a0, 1048568
; CHECK-V-NEXT: vmax.vx v8, v8, a0
@@ -1132,7 +1132,7 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vminu.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
@@ -1199,7 +1199,7 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
@@ -1511,7 +1511,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v10, a0
; CHECK-V-NEXT: lui a0, 1048568
; CHECK-V-NEXT: vmax.vx v10, v8, a0
@@ -1794,7 +1794,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vminu.vx v10, v10, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
@@ -2097,7 +2097,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v10, a0
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
@@ -4227,7 +4227,7 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v9, a0
; CHECK-V-NEXT: lui a0, 1048568
; CHECK-V-NEXT: vmax.vx v8, v8, a0
@@ -4266,7 +4266,7 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.xu.f.w v9, v8
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vminu.vx v8, v9, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
@@ -4306,7 +4306,7 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v9, a0
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
@@ -4384,7 +4384,7 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: lui a0, 1048568
; CHECK-V-NEXT: vmax.vx v8, v8, a0
@@ -4441,7 +4441,7 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vminu.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
@@ -4507,7 +4507,7 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
@@ -4817,7 +4817,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v10, a0
; CHECK-V-NEXT: lui a0, 1048568
; CHECK-V-NEXT: vmax.vx v10, v8, a0
@@ -5096,7 +5096,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vminu.vx v10, v10, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
@@ -5398,7 +5398,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addiw a0, a0, -1
+; CHECK-V-NEXT: addi a0, a0, -1
; CHECK-V-NEXT: vmin.vx v8, v10, a0
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
index 82c9f405c23923d..4e08f401ca4e904 100644
--- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
@@ -65,7 +65,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v9
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
@@ -82,7 +82,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: srli a0, a0, 2
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
@@ -97,7 +97,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: srli a0, a0, 2
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
@@ -163,7 +163,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v9
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
@@ -180,7 +180,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: srli a0, a0, 1
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
@@ -195,7 +195,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: srli a0, a0, 1
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
@@ -257,7 +257,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v10
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0
@@ -273,7 +273,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV64-BITS-256-NEXT: vmv.v.i v8, 0
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
@@ -287,7 +287,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV64-BITS-512-NEXT: vmv.v.i v8, 0
; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: csrr a0, vlenb
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
@@ -353,7 +353,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v12
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0
@@ -370,7 +370,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 1
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v10
; RV64-BITS-256-NEXT: vrsub.vx v10, v10, a0
; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v10
@@ -385,7 +385,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 1
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vid.v v10
; RV64-BITS-512-NEXT: vrsub.vx v10, v10, a0
; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v10
@@ -451,7 +451,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v16
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0
@@ -468,7 +468,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 2
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v12
; RV64-BITS-256-NEXT: vrsub.vx v12, v12, a0
; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v12
@@ -483,7 +483,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 2
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vid.v v12
; RV64-BITS-512-NEXT: vrsub.vx v12, v12, a0
; RV64-BITS-512-NEXT: vrgather.vv v16, v8, v12
@@ -552,7 +552,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v8
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0
@@ -574,7 +574,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 3
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v16
; RV64-BITS-256-NEXT: vrsub.vx v16, v16, a0
; RV64-BITS-256-NEXT: vrgather.vv v24, v8, v16
@@ -586,7 +586,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 2
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; RV64-BITS-512-NEXT: vid.v v8
; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0
@@ -650,7 +650,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 3
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v9
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0
@@ -663,7 +663,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: srli a0, a0, 3
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0
@@ -675,7 +675,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: srli a0, a0, 3
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0
@@ -728,7 +728,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v9
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0
@@ -741,7 +741,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: srli a0, a0, 2
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0
@@ -753,7 +753,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: srli a0, a0, 2
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0
@@ -806,7 +806,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v9
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0
@@ -819,7 +819,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: srli a0, a0, 1
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0
@@ -831,7 +831,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: srli a0, a0, 1
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0
@@ -880,7 +880,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) {
; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i8:
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v10
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0
@@ -892,7 +892,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) {
; RV64-BITS-256-LABEL: reverse_nxv8i8:
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0
@@ -903,7 +903,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) {
; RV64-BITS-512-LABEL: reverse_nxv8i8:
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0
@@ -956,7 +956,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v12
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0
@@ -969,7 +969,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 1
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV64-BITS-256-NEXT: vid.v v10
; RV64-BITS-256-NEXT: vrsub.vx v12, v10, a0
@@ -981,7 +981,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 1
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV64-BITS-512-NEXT: vid.v v10
; RV64-BITS-512-NEXT: vrsub.vx v12, v10, a0
@@ -1034,7 +1034,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v16
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0
@@ -1047,7 +1047,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 2
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; RV64-BITS-256-NEXT: vid.v v12
; RV64-BITS-256-NEXT: vrsub.vx v16, v12, a0
@@ -1059,7 +1059,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 2
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; RV64-BITS-512-NEXT: vid.v v12
; RV64-BITS-512-NEXT: vrsub.vx v16, v12, a0
@@ -1114,7 +1114,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v16
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0
@@ -1128,7 +1128,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 3
-; RV64-BITS-256-NEXT: addiw a0, a0, -1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m8, ta, ma
; RV64-BITS-256-NEXT: vid.v v16
; RV64-BITS-256-NEXT: vrsub.vx v24, v16, a0
@@ -1140,7 +1140,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 2
-; RV64-BITS-512-NEXT: addiw a0, a0, -1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; RV64-BITS-512-NEXT: vid.v v16
; RV64-BITS-512-NEXT: vrsub.vx v24, v16, a0
@@ -1153,305 +1153,175 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
}
define <vscale x 1 x i16> @reverse_nxv1i16(<vscale x 1 x i16> %a) {
-; RV32-LABEL: reverse_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 3
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vx v10, v9, a0
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 3
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vx v10, v9, a0
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a0
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%res = call <vscale x 1 x i16> @llvm.experimental.vector.reverse.nxv1i16(<vscale x 1 x i16> %a)
ret <vscale x 1 x i16> %res
}
define <vscale x 2 x i16> @reverse_nxv2i16(<vscale x 2 x i16> %a) {
-; RV32-LABEL: reverse_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 2
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vx v10, v9, a0
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 2
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vx v10, v9, a0
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a0
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%res = call <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16> %a)
ret <vscale x 2 x i16> %res
}
define <vscale x 4 x i16> @reverse_nxv4i16(<vscale x 4 x i16> %a) {
-; RV32-LABEL: reverse_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 1
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vx v10, v9, a0
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv.v.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vx v10, v9, a0
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv.v.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a0
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
%res = call <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16> %a)
ret <vscale x 4 x i16> %res
}
define <vscale x 8 x i16> @reverse_nxv8i16(<vscale x 8 x i16> %a) {
-; RV32-LABEL: reverse_nxv8i16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT: vid.v v10
-; RV32-NEXT: vrsub.vx v12, v10, a0
-; RV32-NEXT: vrgather.vv v10, v8, v12
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT: vid.v v10
-; RV64-NEXT: vrsub.vx v12, v10, a0
-; RV64-NEXT: vrgather.vv v10, v8, v12
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v12, v10, a0
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> %a)
ret <vscale x 8 x i16> %res
}
define <vscale x 16 x i16> @reverse_nxv16i16(<vscale x 16 x i16> %a) {
-; RV32-LABEL: reverse_nxv16i16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT: vid.v v12
-; RV32-NEXT: vrsub.vx v16, v12, a0
-; RV32-NEXT: vrgather.vv v12, v8, v16
-; RV32-NEXT: vmv.v.v v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv16i16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 1
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT: vid.v v12
-; RV64-NEXT: vrsub.vx v16, v12, a0
-; RV64-NEXT: vrgather.vv v12, v8, v16
-; RV64-NEXT: vmv.v.v v8, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v16, v12, a0
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
%res = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> %a)
ret <vscale x 16 x i16> %res
}
define <vscale x 32 x i16> @reverse_nxv32i16(<vscale x 32 x i16> %a) {
-; RV32-LABEL: reverse_nxv32i16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT: vid.v v16
-; RV32-NEXT: vrsub.vx v24, v16, a0
-; RV32-NEXT: vrgather.vv v16, v8, v24
-; RV32-NEXT: vmv.v.v v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv32i16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 2
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: vrsub.vx v24, v16, a0
-; RV64-NEXT: vrgather.vv v16, v8, v24
-; RV64-NEXT: vmv.v.v v8, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a0
+; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
%res = call <vscale x 32 x i16> @llvm.experimental.vector.reverse.nxv32i16(<vscale x 32 x i16> %a)
ret <vscale x 32 x i16> %res
}
define <vscale x 1 x i32> @reverse_nxv1i32(<vscale x 1 x i32> %a) {
-; RV32-LABEL: reverse_nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 3
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vx v10, v9, a0
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 3
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vx v10, v9, a0
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a0
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%res = call <vscale x 1 x i32> @llvm.experimental.vector.reverse.nxv1i32(<vscale x 1 x i32> %a)
ret <vscale x 1 x i32> %res
}
define <vscale x 2 x i32> @reverse_nxv2i32(<vscale x 2 x i32> %a) {
-; RV32-LABEL: reverse_nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 2
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vx v10, v9, a0
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv.v.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 2
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vx v10, v9, a0
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv.v.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a0
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
%res = call <vscale x 2 x i32> @llvm.experimental.vector.reverse.nxv2i32(<vscale x 2 x i32> %a)
ret <vscale x 2 x i32> %res
}
define <vscale x 4 x i32> @reverse_nxv4i32(<vscale x 4 x i32> %a) {
-; RV32-LABEL: reverse_nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 1
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vid.v v10
-; RV32-NEXT: vrsub.vx v12, v10, a0
-; RV32-NEXT: vrgather.vv v10, v8, v12
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vid.v v10
-; RV64-NEXT: vrsub.vx v12, v10, a0
-; RV64-NEXT: vrgather.vv v10, v8, v12
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v12, v10, a0
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
ret <vscale x 4 x i32> %res
}
define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) {
-; RV32-LABEL: reverse_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vid.v v12
-; RV32-NEXT: vrsub.vx v16, v12, a0
-; RV32-NEXT: vrgather.vv v12, v8, v16
-; RV32-NEXT: vmv.v.v v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vid.v v12
-; RV64-NEXT: vrsub.vx v16, v12, a0
-; RV64-NEXT: vrgather.vv v12, v8, v16
-; RV64-NEXT: vmv.v.v v8, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v16, v12, a0
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
%res = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> %a)
ret <vscale x 8 x i32> %res
}
define <vscale x 16 x i32> @reverse_nxv16i32(<vscale x 16 x i32> %a) {
-; RV32-LABEL: reverse_nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vid.v v16
-; RV32-NEXT: vrsub.vx v24, v16, a0
-; RV32-NEXT: vrgather.vv v16, v8, v24
-; RV32-NEXT: vmv.v.v v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 1
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: vrsub.vx v24, v16, a0
-; RV64-NEXT: vrgather.vv v16, v8, v24
-; RV64-NEXT: vmv.v.v v8, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a0
+; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
%res = call <vscale x 16 x i32> @llvm.experimental.vector.reverse.nxv16i32(<vscale x 16 x i32> %a)
ret <vscale x 16 x i32> %res
}
@@ -1524,305 +1394,175 @@ define <vscale x 8 x i64> @reverse_nxv8i64(<vscale x 8 x i64> %a) {
;
define <vscale x 1 x half> @reverse_nxv1f16(<vscale x 1 x half> %a) {
-; RV32-LABEL: reverse_nxv1f16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 3
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vx v10, v9, a0
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv1f16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 3
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vx v10, v9, a0
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a0
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%res = call <vscale x 1 x half> @llvm.experimental.vector.reverse.nxv1f16(<vscale x 1 x half> %a)
ret <vscale x 1 x half> %res
}
define <vscale x 2 x half> @reverse_nxv2f16(<vscale x 2 x half> %a) {
-; RV32-LABEL: reverse_nxv2f16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 2
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vx v10, v9, a0
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv2f16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 2
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vx v10, v9, a0
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a0
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.experimental.vector.reverse.nxv2f16(<vscale x 2 x half> %a)
ret <vscale x 2 x half> %res
}
define <vscale x 4 x half> @reverse_nxv4f16(<vscale x 4 x half> %a) {
-; RV32-LABEL: reverse_nxv4f16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 1
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vx v10, v9, a0
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv.v.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv4f16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vx v10, v9, a0
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv.v.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a0
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
%res = call <vscale x 4 x half> @llvm.experimental.vector.reverse.nxv4f16(<vscale x 4 x half> %a)
ret <vscale x 4 x half> %res
}
define <vscale x 8 x half> @reverse_nxv8f16(<vscale x 8 x half> %a) {
-; RV32-LABEL: reverse_nxv8f16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT: vid.v v10
-; RV32-NEXT: vrsub.vx v12, v10, a0
-; RV32-NEXT: vrgather.vv v10, v8, v12
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv8f16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT: vid.v v10
-; RV64-NEXT: vrsub.vx v12, v10, a0
-; RV64-NEXT: vrgather.vv v10, v8, v12
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v12, v10, a0
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> %a)
ret <vscale x 8 x half> %res
}
define <vscale x 16 x half> @reverse_nxv16f16(<vscale x 16 x half> %a) {
-; RV32-LABEL: reverse_nxv16f16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT: vid.v v12
-; RV32-NEXT: vrsub.vx v16, v12, a0
-; RV32-NEXT: vrgather.vv v12, v8, v16
-; RV32-NEXT: vmv.v.v v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv16f16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 1
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT: vid.v v12
-; RV64-NEXT: vrsub.vx v16, v12, a0
-; RV64-NEXT: vrgather.vv v12, v8, v16
-; RV64-NEXT: vmv.v.v v8, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v16, v12, a0
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
%res = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> %a)
ret <vscale x 16 x half> %res
}
define <vscale x 32 x half> @reverse_nxv32f16(<vscale x 32 x half> %a) {
-; RV32-LABEL: reverse_nxv32f16:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT: vid.v v16
-; RV32-NEXT: vrsub.vx v24, v16, a0
-; RV32-NEXT: vrgather.vv v16, v8, v24
-; RV32-NEXT: vmv.v.v v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv32f16:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 2
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: vrsub.vx v24, v16, a0
-; RV64-NEXT: vrgather.vv v16, v8, v24
-; RV64-NEXT: vmv.v.v v8, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a0
+; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
%res = call <vscale x 32 x half> @llvm.experimental.vector.reverse.nxv32f16(<vscale x 32 x half> %a)
ret <vscale x 32 x half> %res
}
define <vscale x 1 x float> @reverse_nxv1f32(<vscale x 1 x float> %a) {
-; RV32-LABEL: reverse_nxv1f32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 3
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vx v10, v9, a0
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv1f32:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 3
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vx v10, v9, a0
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a0
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%res = call <vscale x 1 x float> @llvm.experimental.vector.reverse.nxv1f32(<vscale x 1 x float> %a)
ret <vscale x 1 x float> %res
}
define <vscale x 2 x float> @reverse_nxv2f32(<vscale x 2 x float> %a) {
-; RV32-LABEL: reverse_nxv2f32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 2
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vrsub.vx v10, v9, a0
-; RV32-NEXT: vrgather.vv v9, v8, v10
-; RV32-NEXT: vmv.v.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv2f32:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 2
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vrsub.vx v10, v9, a0
-; RV64-NEXT: vrgather.vv v9, v8, v10
-; RV64-NEXT: vmv.v.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a0
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
%res = call <vscale x 2 x float> @llvm.experimental.vector.reverse.nxv2f32(<vscale x 2 x float> %a)
ret <vscale x 2 x float> %res
}
define <vscale x 4 x float> @reverse_nxv4f32(<vscale x 4 x float> %a) {
-; RV32-LABEL: reverse_nxv4f32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: srli a0, a0, 1
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vid.v v10
-; RV32-NEXT: vrsub.vx v12, v10, a0
-; RV32-NEXT: vrgather.vv v10, v8, v12
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv4f32:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vid.v v10
-; RV64-NEXT: vrsub.vx v12, v10, a0
-; RV64-NEXT: vrgather.vv v10, v8, v12
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v12, v10, a0
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
ret <vscale x 4 x float> %res
}
define <vscale x 8 x float> @reverse_nxv8f32(<vscale x 8 x float> %a) {
-; RV32-LABEL: reverse_nxv8f32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vid.v v12
-; RV32-NEXT: vrsub.vx v16, v12, a0
-; RV32-NEXT: vrgather.vv v12, v8, v16
-; RV32-NEXT: vmv.v.v v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv8f32:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vid.v v12
-; RV64-NEXT: vrsub.vx v16, v12, a0
-; RV64-NEXT: vrgather.vv v12, v8, v16
-; RV64-NEXT: vmv.v.v v8, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v16, v12, a0
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
%res = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> %a)
ret <vscale x 8 x float> %res
}
define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) {
-; RV32-LABEL: reverse_nxv16f32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vid.v v16
-; RV32-NEXT: vrsub.vx v24, v16, a0
-; RV32-NEXT: vrgather.vv v16, v8, v24
-; RV32-NEXT: vmv.v.v v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: reverse_nxv16f32:
-; RV64: # %bb.0:
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 1
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: vrsub.vx v24, v16, a0
-; RV64-NEXT: vrgather.vv v16, v8, v24
-; RV64-NEXT: vmv.v.v v8, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: reverse_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a0
+; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
%res = call <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float> %a)
ret <vscale x 16 x float> %res
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
index 1478e8bfd3c658d..f27edd36116657e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
@@ -14,7 +14,7 @@ define <vscale x 4 x i8> @foo(ptr %p) {
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: lui a0, 1
-; CHECK-NEXT: addiw a0, a0, -361
+; CHECK-NEXT: addi a0, a0, -361
; CHECK-NEXT: vmacc.vx v10, a0, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 15
diff --git a/llvm/test/CodeGen/RISCV/rvv/select-sra.ll b/llvm/test/CodeGen/RISCV/rvv/select-sra.ll
index 6804029eaad7053..10c74a0e81e7eab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/select-sra.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/select-sra.ll
@@ -20,11 +20,11 @@ define <4 x i32> @vselect_of_consts(<4 x i1> %cc) {
; RV64-LABEL: vselect_of_consts:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 284280
-; RV64-NEXT: addiw a0, a0, 291
+; RV64-NEXT: addi a0, a0, 291
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: lui a0, 214376
-; RV64-NEXT: addiw a0, a0, -2030
+; RV64-NEXT: addi a0, a0, -2030
; RV64-NEXT: vmerge.vxm v8, v8, a0, v0
; RV64-NEXT: ret
%v = select <4 x i1> %cc, <4 x i32> <i32 878082066, i32 878082066, i32 878082066, i32 878082066>, <4 x i32> <i32 1164411171, i32 1164411171, i32 1164411171, i32 1164411171>
diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
index c91db751561c365..6984f2b3402a76b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
@@ -104,45 +104,25 @@ define <16 x i8> @v16i8(<16 x i8> %a) {
}
define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) {
-; RV32-LABEL: v16i8_2:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI7_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0)
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; RV32-NEXT: vle8.v v12, (a0)
-; RV32-NEXT: vmv1r.v v14, v9
-; RV32-NEXT: vrgather.vv v10, v8, v12
-; RV32-NEXT: vid.v v8
-; RV32-NEXT: vrsub.vi v8, v8, 15
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu
-; RV32-NEXT: vrgather.vv v10, v14, v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: v16i8_2:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI7_0)
-; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0)
-; RV64-NEXT: li a1, 32
-; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; RV64-NEXT: vle8.v v12, (a0)
-; RV64-NEXT: vmv1r.v v14, v9
-; RV64-NEXT: vrgather.vv v10, v8, v12
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: vrsub.vi v8, v8, 15
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu
-; RV64-NEXT: vrgather.vv v10, v14, v8, v0.t
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: v16i8_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0)
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vle8.v v12, (a0)
+; CHECK-NEXT: vmv1r.v v14, v9
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vi v8, v8, 15
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
+; CHECK-NEXT: vrgather.vv v10, v14, v8, v0.t
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
%v32i8 = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x i8> %v32i8
}
@@ -248,45 +228,25 @@ define <16 x i16> @v16i16(<16 x i16> %a) {
}
define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) {
-; RV32-LABEL: v16i16_2:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI15_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI15_0)
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; RV32-NEXT: vle16.v v20, (a0)
-; RV32-NEXT: vmv2r.v v16, v10
-; RV32-NEXT: vmv2r.v v12, v8
-; RV32-NEXT: vrgather.vv v8, v12, v20
-; RV32-NEXT: vid.v v12
-; RV32-NEXT: vrsub.vi v12, v12, 15
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu
-; RV32-NEXT: vrgather.vv v8, v16, v12, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: v16i16_2:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI15_0)
-; RV64-NEXT: addi a0, a0, %lo(.LCPI15_0)
-; RV64-NEXT: li a1, 32
-; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; RV64-NEXT: vle16.v v20, (a0)
-; RV64-NEXT: vmv2r.v v16, v10
-; RV64-NEXT: vmv2r.v v12, v8
-; RV64-NEXT: vrgather.vv v8, v12, v20
-; RV64-NEXT: vid.v v12
-; RV64-NEXT: vrsub.vi v12, v12, 15
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu
-; RV64-NEXT: vrgather.vv v8, v16, v12, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: v16i16_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI15_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI15_0)
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vle16.v v20, (a0)
+; CHECK-NEXT: vmv2r.v v16, v10
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vrgather.vv v8, v12, v20
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vi v12, v12, 15
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t
+; CHECK-NEXT: ret
%v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x i16> %v32i16
}
@@ -401,47 +361,26 @@ define <16 x i32> @v16i32(<16 x i32> %a) {
}
define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) {
-; RV32-LABEL: v16i32_2:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI23_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI23_0)
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; RV32-NEXT: vle16.v v20, (a0)
-; RV32-NEXT: vmv4r.v v24, v12
-; RV32-NEXT: vmv4r.v v16, v8
-; RV32-NEXT: vrgatherei16.vv v8, v16, v20
-; RV32-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; RV32-NEXT: vid.v v16
-; RV32-NEXT: vrsub.vi v16, v16, 15
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: v16i32_2:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, %hi(.LCPI23_0)
-; RV64-NEXT: addi a0, a0, %lo(.LCPI23_0)
-; RV64-NEXT: li a1, 32
-; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; RV64-NEXT: vle16.v v20, (a0)
-; RV64-NEXT: vmv4r.v v24, v12
-; RV64-NEXT: vmv4r.v v16, v8
-; RV64-NEXT: vrgatherei16.vv v8, v16, v20
-; RV64-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: vrsub.vi v16, v16, 15
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu
-; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: v16i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI23_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI23_0)
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vle16.v v20, (a0)
+; CHECK-NEXT: vmv4r.v v24, v12
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vrgatherei16.vv v8, v16, v20
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vi v16, v16, 15
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
+; CHECK-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
+; CHECK-NEXT: ret
%v32i32 = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x i32> %v32i32
}
@@ -793,3 +732,6 @@ define <32 x i8> @v32i8(<32 x i8> %a) {
ret <32 x i8> %v32i8
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 63a85b1f4dc74e4..f08bfce409305c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -251,7 +251,7 @@ define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB7_5
; CHECK-NEXT: .LBB7_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -342,7 +342,7 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB8_5
; CHECK-NEXT: .LBB8_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -433,7 +433,7 @@ define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB9_5
; CHECK-NEXT: .LBB9_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -524,7 +524,7 @@ define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB10_5
; CHECK-NEXT: .LBB10_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -615,7 +615,7 @@ define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB11_5
; CHECK-NEXT: .LBB11_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -706,7 +706,7 @@ define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB12_5
; CHECK-NEXT: .LBB12_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -797,7 +797,7 @@ define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB13_5
; CHECK-NEXT: .LBB13_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -990,7 +990,7 @@ define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB17_5
; CHECK-NEXT: .LBB17_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -1081,7 +1081,7 @@ define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB18_5
; CHECK-NEXT: .LBB18_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -1172,7 +1172,7 @@ define void @sink_splat_ashr_scalable(ptr nocapture %a) {
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: j .LBB19_5
; CHECK-NEXT: .LBB19_2: # %vector.ph
-; CHECK-NEXT: addiw a1, a2, -1
+; CHECK-NEXT: addi a1, a2, -1
; CHECK-NEXT: andi a3, a1, 1024
; CHECK-NEXT: xori a1, a3, 1024
; CHECK-NEXT: slli a4, a4, 1
@@ -1467,7 +1467,7 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB26_5
; CHECK-NEXT: .LBB26_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
@@ -1557,7 +1557,7 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB27_5
; CHECK-NEXT: .LBB27_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
@@ -1647,7 +1647,7 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB28_5
; CHECK-NEXT: .LBB28_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
@@ -1737,7 +1737,7 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB29_5
; CHECK-NEXT: .LBB29_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
@@ -1827,7 +1827,7 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB30_5
; CHECK-NEXT: .LBB30_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
@@ -1917,7 +1917,7 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB31_5
; CHECK-NEXT: .LBB31_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
@@ -2083,7 +2083,7 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap
; CHECK-NEXT: li a4, 0
; CHECK-NEXT: j .LBB34_5
; CHECK-NEXT: .LBB34_2: # %vector.ph
-; CHECK-NEXT: addiw a4, a3, -1
+; CHECK-NEXT: addi a4, a3, -1
; CHECK-NEXT: andi a5, a4, 1024
; CHECK-NEXT: xori a4, a5, 1024
; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma
@@ -2183,7 +2183,7 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali
; CHECK-NEXT: li a4, 0
; CHECK-NEXT: j .LBB35_5
; CHECK-NEXT: .LBB35_2: # %vector.ph
-; CHECK-NEXT: addiw a4, a3, -1
+; CHECK-NEXT: addi a4, a3, -1
; CHECK-NEXT: andi a5, a4, 1024
; CHECK-NEXT: xori a4, a5, 1024
; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma
@@ -2496,7 +2496,7 @@ define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB42_5
; CHECK-NEXT: .LBB42_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -2587,7 +2587,7 @@ define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB43_5
; CHECK-NEXT: .LBB43_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -2678,7 +2678,7 @@ define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB44_5
; CHECK-NEXT: .LBB44_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
@@ -2769,7 +2769,7 @@ define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB45_5
; CHECK-NEXT: .LBB45_2: # %vector.ph
-; CHECK-NEXT: addiw a2, a3, -1
+; CHECK-NEXT: addi a2, a3, -1
; CHECK-NEXT: andi a4, a2, 1024
; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
index 56d98981947c3c0..b7fe722958bfb89 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
@@ -32,7 +32,7 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: addiw a1, a0, -1
+; CHECK-NEXT: addi a1, a0, -1
; CHECK-NEXT: vsll.vv v10, v8, v9
; CHECK-NEXT: vsra.vv v9, v10, v9
; CHECK-NEXT: vmsne.vv v8, v8, v9
@@ -51,7 +51,7 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addiw a1, a0, -1
+; CHECK-NEXT: addi a1, a0, -1
; CHECK-NEXT: vsll.vv v10, v8, v9
; CHECK-NEXT: vsra.vv v9, v10, v9
; CHECK-NEXT: vmsne.vv v8, v8, v9
@@ -114,7 +114,7 @@ define <vscale x 4 x i32> @vec_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32>
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: addiw a1, a0, -1
+; CHECK-NEXT: addi a1, a0, -1
; CHECK-NEXT: vsll.vv v12, v8, v10
; CHECK-NEXT: vsra.vv v14, v12, v10
; CHECK-NEXT: vmsne.vv v10, v8, v14
@@ -133,7 +133,7 @@ define <vscale x 8 x i16> @vec_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16>
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addiw a1, a0, -1
+; CHECK-NEXT: addi a1, a0, -1
; CHECK-NEXT: vsll.vv v12, v8, v10
; CHECK-NEXT: vsra.vv v14, v12, v10
; CHECK-NEXT: vmsne.vv v10, v8, v14
diff --git a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll
index 839538039c70b97..bfbbb4b4067f841 100644
--- a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll
@@ -22,14 +22,14 @@ define <vscale x 1 x i16> @test_urem_vec_even_divisor_eq0(<vscale x 1 x i16> %x)
; RV64-LABEL: test_urem_vec_even_divisor_eq0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, -1365
+; RV64-NEXT: addi a0, a0, -1365
; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; RV64-NEXT: vmul.vx v8, v8, a0
; RV64-NEXT: vsll.vi v9, v8, 15
; RV64-NEXT: vsrl.vi v8, v8, 1
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, -1366
+; RV64-NEXT: addi a0, a0, -1366
; RV64-NEXT: vmsgtu.vx v0, v8, a0
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
@@ -61,11 +61,11 @@ define <vscale x 1 x i16> @test_urem_vec_odd_divisor_eq0(<vscale x 1 x i16> %x)
; RV64-LABEL: test_urem_vec_odd_divisor_eq0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 1048573
-; RV64-NEXT: addiw a0, a0, -819
+; RV64-NEXT: addi a0, a0, -819
; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; RV64-NEXT: vmul.vx v8, v8, a0
; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 819
+; RV64-NEXT: addi a0, a0, 819
; RV64-NEXT: vmsgtu.vx v0, v8, a0
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
@@ -105,13 +105,13 @@ define <vscale x 1 x i16> @test_urem_vec_even_divisor_eq1(<vscale x 1 x i16> %x)
; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; RV64-NEXT: vsub.vx v8, v8, a0
; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, -1365
+; RV64-NEXT: addi a0, a0, -1365
; RV64-NEXT: vmul.vx v8, v8, a0
; RV64-NEXT: vsll.vi v9, v8, 15
; RV64-NEXT: vsrl.vi v8, v8, 1
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, -1366
+; RV64-NEXT: addi a0, a0, -1366
; RV64-NEXT: vmsgtu.vx v0, v8, a0
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
@@ -148,10 +148,10 @@ define <vscale x 1 x i16> @test_urem_vec_odd_divisor_eq1(<vscale x 1 x i16> %x)
; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; RV64-NEXT: vsub.vx v8, v8, a0
; RV64-NEXT: lui a0, 1048573
-; RV64-NEXT: addiw a0, a0, -819
+; RV64-NEXT: addi a0, a0, -819
; RV64-NEXT: vmul.vx v8, v8, a0
; RV64-NEXT: lui a0, 3
-; RV64-NEXT: addiw a0, a0, 818
+; RV64-NEXT: addi a0, a0, 818
; RV64-NEXT: vmsgtu.vx v0, v8, a0
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
index 5e32e551ba0dd80..0028ac88cc4fed5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
@@ -324,27 +324,16 @@ define <vscale x 1 x i16> @vdiv_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext %
}
define <vscale x 1 x i16> @vdiv_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv1i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: vsra.vi v8, v8, 1
-; RV32-NEXT: vsrl.vi v9, v8, 15
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv1i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: vsra.vi v8, v8, 1
-; RV64-NEXT: vsrl.vi v9, v8, 15
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv1i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsrl.vi v9, v8, 15
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
%vc = sdiv <vscale x 1 x i16> %va, %splat
@@ -374,27 +363,16 @@ define <vscale x 2 x i16> @vdiv_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext %
}
define <vscale x 2 x i16> @vdiv_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv2i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: vsra.vi v8, v8, 1
-; RV32-NEXT: vsrl.vi v9, v8, 15
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv2i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: vsra.vi v8, v8, 1
-; RV64-NEXT: vsrl.vi v9, v8, 15
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv2i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsrl.vi v9, v8, 15
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
%vc = sdiv <vscale x 2 x i16> %va, %splat
@@ -424,27 +402,16 @@ define <vscale x 4 x i16> @vdiv_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext %
}
define <vscale x 4 x i16> @vdiv_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv4i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: vsra.vi v8, v8, 1
-; RV32-NEXT: vsrl.vi v9, v8, 15
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv4i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: vsra.vi v8, v8, 1
-; RV64-NEXT: vsrl.vi v9, v8, 15
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv4i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsrl.vi v9, v8, 15
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
%vc = sdiv <vscale x 4 x i16> %va, %splat
@@ -474,27 +441,16 @@ define <vscale x 8 x i16> @vdiv_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext %
}
define <vscale x 8 x i16> @vdiv_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv8i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: vsra.vi v8, v8, 1
-; RV32-NEXT: vsrl.vi v10, v8, 15
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv8i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: vsra.vi v8, v8, 1
-; RV64-NEXT: vsrl.vi v10, v8, 15
-; RV64-NEXT: vadd.vv v8, v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv8i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsrl.vi v10, v8, 15
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
%vc = sdiv <vscale x 8 x i16> %va, %splat
@@ -524,27 +480,16 @@ define <vscale x 16 x i16> @vdiv_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signex
}
define <vscale x 16 x i16> @vdiv_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv16i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: vsra.vi v8, v8, 1
-; RV32-NEXT: vsrl.vi v12, v8, 15
-; RV32-NEXT: vadd.vv v8, v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv16i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: vsra.vi v8, v8, 1
-; RV64-NEXT: vsrl.vi v12, v8, 15
-; RV64-NEXT: vadd.vv v8, v8, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv16i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsrl.vi v12, v8, 15
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
%vc = sdiv <vscale x 16 x i16> %va, %splat
@@ -574,27 +519,16 @@ define <vscale x 32 x i16> @vdiv_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signex
}
define <vscale x 32 x i16> @vdiv_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
-; RV32-LABEL: vdiv_vi_nxv32i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: vsra.vi v8, v8, 1
-; RV32-NEXT: vsrl.vi v16, v8, 15
-; RV32-NEXT: vadd.vv v8, v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdiv_vi_nxv32i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: vsra.vi v8, v8, 1
-; RV64-NEXT: vsrl.vi v16, v8, 15
-; RV64-NEXT: vadd.vv v8, v8, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: vdiv_vi_nxv32i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsrl.vi v16, v8, 15
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
%vc = sdiv <vscale x 32 x i16> %va, %splat
@@ -639,7 +573,7 @@ define <vscale x 1 x i32> @vdiv_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
; RV64-LABEL: vdiv_vi_nxv1i32_0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addiw a0, a0, -1171
+; RV64-NEXT: addi a0, a0, -1171
; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; RV64-NEXT: vmulh.vx v9, v8, a0
; RV64-NEXT: vsub.vv v8, v9, v8
@@ -691,7 +625,7 @@ define <vscale x 2 x i32> @vdiv_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
; RV64-LABEL: vdiv_vi_nxv2i32_0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addiw a0, a0, -1171
+; RV64-NEXT: addi a0, a0, -1171
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV64-NEXT: vmulh.vx v9, v8, a0
; RV64-NEXT: vsub.vv v8, v9, v8
@@ -743,7 +677,7 @@ define <vscale x 4 x i32> @vdiv_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
; RV64-LABEL: vdiv_vi_nxv4i32_0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addiw a0, a0, -1171
+; RV64-NEXT: addi a0, a0, -1171
; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
; RV64-NEXT: vmulh.vx v10, v8, a0
; RV64-NEXT: vsub.vv v8, v10, v8
@@ -795,7 +729,7 @@ define <vscale x 8 x i32> @vdiv_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
; RV64-LABEL: vdiv_vi_nxv8i32_0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addiw a0, a0, -1171
+; RV64-NEXT: addi a0, a0, -1171
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; RV64-NEXT: vmulh.vx v12, v8, a0
; RV64-NEXT: vsub.vv v8, v12, v8
@@ -847,7 +781,7 @@ define <vscale x 16 x i32> @vdiv_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
; RV64-LABEL: vdiv_vi_nxv16i32_0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addiw a0, a0, -1171
+; RV64-NEXT: addi a0, a0, -1171
; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; RV64-NEXT: vmulh.vx v16, v8, a0
; RV64-NEXT: vsub.vv v8, v16, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
index 90b8a7fa70b32ef..c505cb3d1bbd4ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
@@ -303,23 +303,14 @@ define <vscale x 1 x i16> @vdivu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext
}
define <vscale x 1 x i16> @vdivu_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv1i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 13
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv1i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 13
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv1i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 13
+; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
%vc = udiv <vscale x 1 x i16> %va, %splat
@@ -349,23 +340,14 @@ define <vscale x 2 x i16> @vdivu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext
}
define <vscale x 2 x i16> @vdivu_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv2i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 13
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv2i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 13
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv2i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 13
+; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
%vc = udiv <vscale x 2 x i16> %va, %splat
@@ -395,23 +377,14 @@ define <vscale x 4 x i16> @vdivu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext
}
define <vscale x 4 x i16> @vdivu_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv4i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 13
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv4i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 13
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv4i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 13
+; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
%vc = udiv <vscale x 4 x i16> %va, %splat
@@ -441,23 +414,14 @@ define <vscale x 8 x i16> @vdivu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext
}
define <vscale x 8 x i16> @vdivu_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv8i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 13
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv8i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 13
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv8i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 13
+; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
%vc = udiv <vscale x 8 x i16> %va, %splat
@@ -487,23 +451,14 @@ define <vscale x 16 x i16> @vdivu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signe
}
define <vscale x 16 x i16> @vdivu_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv16i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 13
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv16i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 13
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv16i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 13
+; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
%vc = udiv <vscale x 16 x i16> %va, %splat
@@ -533,23 +488,14 @@ define <vscale x 32 x i16> @vdivu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signe
}
define <vscale x 32 x i16> @vdivu_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
-; RV32-LABEL: vdivu_vi_nxv32i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 13
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv32i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 13
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv32i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 13
+; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
%vc = udiv <vscale x 32 x i16> %va, %splat
@@ -579,23 +525,14 @@ define <vscale x 1 x i32> @vdivu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 signext
}
define <vscale x 1 x i32> @vdivu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vdivu_vi_nxv1i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 131072
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 29
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv1i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 131072
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 29
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 131072
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 29
+; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
%vc = udiv <vscale x 1 x i32> %va, %splat
@@ -625,23 +562,14 @@ define <vscale x 2 x i32> @vdivu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 signext
}
define <vscale x 2 x i32> @vdivu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vdivu_vi_nxv2i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 131072
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 29
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv2i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 131072
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 29
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 131072
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 29
+; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%vc = udiv <vscale x 2 x i32> %va, %splat
@@ -671,23 +599,14 @@ define <vscale x 4 x i32> @vdivu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 signext
}
define <vscale x 4 x i32> @vdivu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vdivu_vi_nxv4i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 131072
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 29
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv4i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 131072
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 29
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 131072
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 29
+; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%vc = udiv <vscale x 4 x i32> %va, %splat
@@ -717,23 +636,14 @@ define <vscale x 8 x i32> @vdivu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 signext
}
define <vscale x 8 x i32> @vdivu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vdivu_vi_nxv8i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 131072
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 29
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv8i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 131072
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 29
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 131072
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 29
+; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
%vc = udiv <vscale x 8 x i32> %va, %splat
@@ -763,23 +673,14 @@ define <vscale x 16 x i32> @vdivu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 signe
}
define <vscale x 16 x i32> @vdivu_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
-; RV32-LABEL: vdivu_vi_nxv16i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 131072
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: vsrl.vi v8, v8, 29
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_nxv16i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 131072
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: vsrl.vi v8, v8, 29
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_nxv16i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 131072
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 29
+; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
%vc = udiv <vscale x 16 x i32> %va, %splat
@@ -1231,29 +1132,17 @@ define <vscale x 8 x i32> @vdivu_vx_mask_nxv8i32(<vscale x 8 x i32> %va, i32 sig
}
define <vscale x 8 x i32> @vdivu_vi_mask_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %mask) {
-; RV32-LABEL: vdivu_vi_mask_nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 149797
-; RV32-NEXT: addi a0, a0, -1755
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT: vmulhu.vx v12, v8, a0
-; RV32-NEXT: vsub.vv v16, v8, v12
-; RV32-NEXT: vsrl.vi v16, v16, 1
-; RV32-NEXT: vadd.vv v12, v16, v12
-; RV32-NEXT: vsrl.vi v8, v12, 2, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vdivu_vi_mask_nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 149797
-; RV64-NEXT: addiw a0, a0, -1755
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
-; RV64-NEXT: vmulhu.vx v12, v8, a0
-; RV64-NEXT: vsub.vv v16, v8, v12
-; RV64-NEXT: vsrl.vi v16, v16, 1
-; RV64-NEXT: vadd.vv v12, v16, v12
-; RV64-NEXT: vsrl.vi v8, v12, 2, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vdivu_vi_mask_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 149797
+; CHECK-NEXT: addi a0, a0, -1755
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu
+; CHECK-NEXT: vmulhu.vx v12, v8, a0
+; CHECK-NEXT: vsub.vv v16, v8, v12
+; CHECK-NEXT: vsrl.vi v16, v16, 1
+; CHECK-NEXT: vadd.vv v12, v16, v12
+; CHECK-NEXT: vsrl.vi v8, v12, 2, v0.t
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
%one = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
%head2 = insertelement <vscale x 8 x i32> poison, i32 7, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index ff236d7def7d6e1..b8f2afd194e46d6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -89,35 +89,35 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
}
define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; RV32-LABEL: vector_interleave_v4i64_v2i64:
-; RV32: # %bb.0:
-; RV32-NEXT: vmv1r.v v10, v9
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vslideup.vi v8, v10, 2
-; RV32-NEXT: lui a0, 12304
-; RV32-NEXT: addi a0, a0, 512
-; RV32-NEXT: vmv.s.x v10, a0
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v10
-; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vrgatherei16.vv v10, v8, v12
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
+; CHECK-LABEL: vector_interleave_v4i64_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 2
+; CHECK-NEXT: lui a0, 12304
+; CHECK-NEXT: addi a0, a0, 512
+; CHECK-NEXT: vmv.s.x v10, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
;
-; RV64-LABEL: vector_interleave_v4i64_v2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vmv1r.v v10, v9
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vslideup.vi v8, v10, 2
-; RV64-NEXT: lui a0, 12304
-; RV64-NEXT: addiw a0, a0, 512
-; RV64-NEXT: vmv.s.x v10, a0
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsext.vf2 v12, v10
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vrgatherei16.vv v10, v8, v12
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; ZVBB-LABEL: vector_interleave_v4i64_v2i64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vmv1r.v v10, v9
+; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT: vslideup.vi v8, v10, 2
+; ZVBB-NEXT: lui a0, 12304
+; ZVBB-NEXT: addi a0, a0, 512
+; ZVBB-NEXT: vmv.s.x v10, a0
+; ZVBB-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsext.vf2 v12, v10
+; ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; ZVBB-NEXT: vrgatherei16.vv v10, v8, v12
+; ZVBB-NEXT: vmv.v.v v8, v10
+; ZVBB-NEXT: ret
%res = call <4 x i64> @llvm.experimental.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b)
ret <4 x i64> %res
}
@@ -237,35 +237,35 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b
}
define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) {
-; RV32-LABEL: vector_interleave_v4f64_v2f64:
-; RV32: # %bb.0:
-; RV32-NEXT: vmv1r.v v10, v9
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vslideup.vi v8, v10, 2
-; RV32-NEXT: lui a0, 12304
-; RV32-NEXT: addi a0, a0, 512
-; RV32-NEXT: vmv.s.x v10, a0
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v10
-; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vrgatherei16.vv v10, v8, v12
-; RV32-NEXT: vmv.v.v v8, v10
-; RV32-NEXT: ret
+; CHECK-LABEL: vector_interleave_v4f64_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 2
+; CHECK-NEXT: lui a0, 12304
+; CHECK-NEXT: addi a0, a0, 512
+; CHECK-NEXT: vmv.s.x v10, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
;
-; RV64-LABEL: vector_interleave_v4f64_v2f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vmv1r.v v10, v9
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vslideup.vi v8, v10, 2
-; RV64-NEXT: lui a0, 12304
-; RV64-NEXT: addiw a0, a0, 512
-; RV64-NEXT: vmv.s.x v10, a0
-; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64-NEXT: vsext.vf2 v12, v10
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vrgatherei16.vv v10, v8, v12
-; RV64-NEXT: vmv.v.v v8, v10
-; RV64-NEXT: ret
+; ZVBB-LABEL: vector_interleave_v4f64_v2f64:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vmv1r.v v10, v9
+; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVBB-NEXT: vslideup.vi v8, v10, 2
+; ZVBB-NEXT: lui a0, 12304
+; ZVBB-NEXT: addi a0, a0, 512
+; ZVBB-NEXT: vmv.s.x v10, a0
+; ZVBB-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsext.vf2 v12, v10
+; ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; ZVBB-NEXT: vrgatherei16.vv v10, v8, v12
+; ZVBB-NEXT: vmv.v.v v8, v10
+; ZVBB-NEXT: ret
%res = call <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b)
ret <4 x double> %res
}
@@ -277,3 +277,6 @@ declare <4 x float> @llvm.experimental.vector.interleave2.v4f32(<2 x float>, <2
declare <16 x half> @llvm.experimental.vector.interleave2.v16f16(<8 x half>, <8 x half>)
declare <8 x float> @llvm.experimental.vector.interleave2.v8f32(<4 x float>, <4 x float>)
declare <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double>, <2 x double>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
index d407cf43a4fc1be..58874fe8c8fca7a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
@@ -392,31 +392,18 @@ define <vscale x 1 x i16> @vrem_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext %
}
define <vscale x 1 x i16> @vrem_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv1i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT: vmulh.vx v9, v8, a0
-; RV32-NEXT: vsra.vi v9, v9, 1
-; RV32-NEXT: vsrl.vi v10, v9, 15
-; RV32-NEXT: vadd.vv v9, v9, v10
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv1i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT: vmulh.vx v9, v8, a0
-; RV64-NEXT: vsra.vi v9, v9, 1
-; RV64-NEXT: vsrl.vi v10, v9, 15
-; RV64-NEXT: vadd.vv v9, v9, v10
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv1i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmulh.vx v9, v8, a0
+; CHECK-NEXT: vsra.vi v9, v9, 1
+; CHECK-NEXT: vsrl.vi v10, v9, 15
+; CHECK-NEXT: vadd.vv v9, v9, v10
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
%vc = srem <vscale x 1 x i16> %va, %splat
@@ -459,31 +446,18 @@ define <vscale x 2 x i16> @vrem_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext %
}
define <vscale x 2 x i16> @vrem_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv2i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT: vmulh.vx v9, v8, a0
-; RV32-NEXT: vsra.vi v9, v9, 1
-; RV32-NEXT: vsrl.vi v10, v9, 15
-; RV32-NEXT: vadd.vv v9, v9, v10
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv2i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT: vmulh.vx v9, v8, a0
-; RV64-NEXT: vsra.vi v9, v9, 1
-; RV64-NEXT: vsrl.vi v10, v9, 15
-; RV64-NEXT: vadd.vv v9, v9, v10
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv2i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmulh.vx v9, v8, a0
+; CHECK-NEXT: vsra.vi v9, v9, 1
+; CHECK-NEXT: vsrl.vi v10, v9, 15
+; CHECK-NEXT: vadd.vv v9, v9, v10
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
%vc = srem <vscale x 2 x i16> %va, %splat
@@ -526,31 +500,18 @@ define <vscale x 4 x i16> @vrem_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext %
}
define <vscale x 4 x i16> @vrem_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv4i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT: vmulh.vx v9, v8, a0
-; RV32-NEXT: vsra.vi v9, v9, 1
-; RV32-NEXT: vsrl.vi v10, v9, 15
-; RV32-NEXT: vadd.vv v9, v9, v10
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv4i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT: vmulh.vx v9, v8, a0
-; RV64-NEXT: vsra.vi v9, v9, 1
-; RV64-NEXT: vsrl.vi v10, v9, 15
-; RV64-NEXT: vadd.vv v9, v9, v10
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv4i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmulh.vx v9, v8, a0
+; CHECK-NEXT: vsra.vi v9, v9, 1
+; CHECK-NEXT: vsrl.vi v10, v9, 15
+; CHECK-NEXT: vadd.vv v9, v9, v10
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
%vc = srem <vscale x 4 x i16> %va, %splat
@@ -593,31 +554,18 @@ define <vscale x 8 x i16> @vrem_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext %
}
define <vscale x 8 x i16> @vrem_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv8i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT: vmulh.vx v10, v8, a0
-; RV32-NEXT: vsra.vi v10, v10, 1
-; RV32-NEXT: vsrl.vi v12, v10, 15
-; RV32-NEXT: vadd.vv v10, v10, v12
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv8i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT: vmulh.vx v10, v8, a0
-; RV64-NEXT: vsra.vi v10, v10, 1
-; RV64-NEXT: vsrl.vi v12, v10, 15
-; RV64-NEXT: vadd.vv v10, v10, v12
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv8i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmulh.vx v10, v8, a0
+; CHECK-NEXT: vsra.vi v10, v10, 1
+; CHECK-NEXT: vsrl.vi v12, v10, 15
+; CHECK-NEXT: vadd.vv v10, v10, v12
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v10
+; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
%vc = srem <vscale x 8 x i16> %va, %splat
@@ -660,31 +608,18 @@ define <vscale x 16 x i16> @vrem_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signex
}
define <vscale x 16 x i16> @vrem_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv16i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT: vmulh.vx v12, v8, a0
-; RV32-NEXT: vsra.vi v12, v12, 1
-; RV32-NEXT: vsrl.vi v16, v12, 15
-; RV32-NEXT: vadd.vv v12, v12, v16
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv16i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT: vmulh.vx v12, v8, a0
-; RV64-NEXT: vsra.vi v12, v12, 1
-; RV64-NEXT: vsrl.vi v16, v12, 15
-; RV64-NEXT: vadd.vv v12, v12, v16
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv16i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmulh.vx v12, v8, a0
+; CHECK-NEXT: vsra.vi v12, v12, 1
+; CHECK-NEXT: vsrl.vi v16, v12, 15
+; CHECK-NEXT: vadd.vv v12, v12, v16
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v12
+; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
%vc = srem <vscale x 16 x i16> %va, %splat
@@ -727,31 +662,18 @@ define <vscale x 32 x i16> @vrem_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signex
}
define <vscale x 32 x i16> @vrem_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
-; RV32-LABEL: vrem_vi_nxv32i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 1048571
-; RV32-NEXT: addi a0, a0, 1755
-; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT: vmulh.vx v16, v8, a0
-; RV32-NEXT: vsra.vi v16, v16, 1
-; RV32-NEXT: vsrl.vi v24, v16, 15
-; RV32-NEXT: vadd.vv v16, v16, v24
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vrem_vi_nxv32i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 1048571
-; RV64-NEXT: addiw a0, a0, 1755
-; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT: vmulh.vx v16, v8, a0
-; RV64-NEXT: vsra.vi v16, v16, 1
-; RV64-NEXT: vsrl.vi v24, v16, 15
-; RV64-NEXT: vadd.vv v16, v16, v24
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: vrem_vi_nxv32i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 1048571
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmulh.vx v16, v8, a0
+; CHECK-NEXT: vsra.vi v16, v16, 1
+; CHECK-NEXT: vsrl.vi v24, v16, 15
+; CHECK-NEXT: vadd.vv v16, v16, v24
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v16
+; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
%vc = srem <vscale x 32 x i16> %va, %splat
@@ -798,7 +720,7 @@ define <vscale x 1 x i32> @vrem_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
; RV64-LABEL: vrem_vi_nxv1i32_0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addiw a0, a0, -1171
+; RV64-NEXT: addi a0, a0, -1171
; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; RV64-NEXT: vmulh.vx v9, v8, a0
; RV64-NEXT: vsub.vv v9, v9, v8
@@ -854,7 +776,7 @@ define <vscale x 2 x i32> @vrem_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
; RV64-LABEL: vrem_vi_nxv2i32_0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addiw a0, a0, -1171
+; RV64-NEXT: addi a0, a0, -1171
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV64-NEXT: vmulh.vx v9, v8, a0
; RV64-NEXT: vsub.vv v9, v9, v8
@@ -910,7 +832,7 @@ define <vscale x 4 x i32> @vrem_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
; RV64-LABEL: vrem_vi_nxv4i32_0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addiw a0, a0, -1171
+; RV64-NEXT: addi a0, a0, -1171
; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
; RV64-NEXT: vmulh.vx v10, v8, a0
; RV64-NEXT: vsub.vv v10, v10, v8
@@ -966,7 +888,7 @@ define <vscale x 8 x i32> @vrem_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
; RV64-LABEL: vrem_vi_nxv8i32_0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addiw a0, a0, -1171
+; RV64-NEXT: addi a0, a0, -1171
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; RV64-NEXT: vmulh.vx v12, v8, a0
; RV64-NEXT: vsub.vv v12, v12, v8
@@ -1022,7 +944,7 @@ define <vscale x 16 x i32> @vrem_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
; RV64-LABEL: vrem_vi_nxv16i32_0:
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 449390
-; RV64-NEXT: addiw a0, a0, -1171
+; RV64-NEXT: addi a0, a0, -1171
; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; RV64-NEXT: vmulh.vx v16, v8, a0
; RV64-NEXT: vsub.vv v16, v16, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
index 4f85acb0bd5ee18..428d071cac399ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
@@ -293,27 +293,16 @@ define <vscale x 1 x i16> @vremu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext
}
define <vscale x 1 x i16> @vremu_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv1i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV32-NEXT: vmulhu.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v9, v9, 13
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv1i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; RV64-NEXT: vmulhu.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v9, v9, 13
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv1i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmulhu.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v9, v9, 13
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
%vc = urem <vscale x 1 x i16> %va, %splat
@@ -343,27 +332,16 @@ define <vscale x 2 x i16> @vremu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext
}
define <vscale x 2 x i16> @vremu_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv2i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV32-NEXT: vmulhu.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v9, v9, 13
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv2i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; RV64-NEXT: vmulhu.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v9, v9, 13
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv2i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmulhu.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v9, v9, 13
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
%vc = urem <vscale x 2 x i16> %va, %splat
@@ -393,27 +371,16 @@ define <vscale x 4 x i16> @vremu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext
}
define <vscale x 4 x i16> @vremu_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv4i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV32-NEXT: vmulhu.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v9, v9, 13
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv4i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; RV64-NEXT: vmulhu.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v9, v9, 13
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv4i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v9, v9, 13
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
%vc = urem <vscale x 4 x i16> %va, %splat
@@ -443,27 +410,16 @@ define <vscale x 8 x i16> @vremu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext
}
define <vscale x 8 x i16> @vremu_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv8i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV32-NEXT: vmulhu.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v10, v10, 13
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv8i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT: vmulhu.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v10, v10, 13
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv8i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmulhu.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v10, v10, 13
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v10
+; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
%vc = urem <vscale x 8 x i16> %va, %splat
@@ -493,27 +449,16 @@ define <vscale x 16 x i16> @vremu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signe
}
define <vscale x 16 x i16> @vremu_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv16i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV32-NEXT: vmulhu.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v12, v12, 13
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv16i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; RV64-NEXT: vmulhu.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v12, v12, 13
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv16i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmulhu.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v12, v12, 13
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v12
+; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
%vc = urem <vscale x 16 x i16> %va, %splat
@@ -543,27 +488,16 @@ define <vscale x 32 x i16> @vremu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signe
}
define <vscale x 32 x i16> @vremu_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
-; RV32-LABEL: vremu_vi_nxv32i16_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 2
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV32-NEXT: vmulhu.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v16, v16, 13
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv32i16_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; RV64-NEXT: vmulhu.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v16, v16, 13
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv32i16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmulhu.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v16, v16, 13
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v16
+; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i16> poison, i16 -7, i32 0
%splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
%vc = urem <vscale x 32 x i16> %va, %splat
@@ -593,27 +527,16 @@ define <vscale x 1 x i32> @vremu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 signext
}
define <vscale x 1 x i32> @vremu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vremu_vi_nxv1i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 131072
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmulhu.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v9, v9, 29
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv1i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 131072
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulhu.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v9, v9, 29
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 131072
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmulhu.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v9, v9, 29
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
%vc = urem <vscale x 1 x i32> %va, %splat
@@ -643,27 +566,16 @@ define <vscale x 2 x i32> @vremu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 signext
}
define <vscale x 2 x i32> @vremu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vremu_vi_nxv2i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 131072
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vmulhu.vx v9, v8, a0
-; RV32-NEXT: vsrl.vi v9, v9, 29
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv2i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 131072
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulhu.vx v9, v8, a0
-; RV64-NEXT: vsrl.vi v9, v9, 29
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 131072
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v9, v8, a0
+; CHECK-NEXT: vsrl.vi v9, v9, 29
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v9
+; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%vc = urem <vscale x 2 x i32> %va, %splat
@@ -693,27 +605,16 @@ define <vscale x 4 x i32> @vremu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 signext
}
define <vscale x 4 x i32> @vremu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vremu_vi_nxv4i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 131072
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vmulhu.vx v10, v8, a0
-; RV32-NEXT: vsrl.vi v10, v10, 29
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv4i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 131072
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulhu.vx v10, v8, a0
-; RV64-NEXT: vsrl.vi v10, v10, 29
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 131072
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmulhu.vx v10, v8, a0
+; CHECK-NEXT: vsrl.vi v10, v10, 29
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v10
+; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%vc = urem <vscale x 4 x i32> %va, %splat
@@ -743,27 +644,16 @@ define <vscale x 8 x i32> @vremu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 signext
}
define <vscale x 8 x i32> @vremu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vremu_vi_nxv8i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 131072
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vmulhu.vx v12, v8, a0
-; RV32-NEXT: vsrl.vi v12, v12, 29
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv8i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 131072
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulhu.vx v12, v8, a0
-; RV64-NEXT: vsrl.vi v12, v12, 29
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 131072
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmulhu.vx v12, v8, a0
+; CHECK-NEXT: vsrl.vi v12, v12, 29
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v12
+; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
%vc = urem <vscale x 8 x i32> %va, %splat
@@ -793,27 +683,16 @@ define <vscale x 16 x i32> @vremu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 signe
}
define <vscale x 16 x i32> @vremu_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
-; RV32-LABEL: vremu_vi_nxv16i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 131072
-; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vmulhu.vx v16, v8, a0
-; RV32-NEXT: vsrl.vi v16, v16, 29
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vnmsac.vx v8, a0, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vremu_vi_nxv16i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 131072
-; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT: vmulhu.vx v16, v8, a0
-; RV64-NEXT: vsrl.vi v16, v16, 29
-; RV64-NEXT: li a0, -7
-; RV64-NEXT: vnmsac.vx v8, a0, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: vremu_vi_nxv16i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 131072
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmulhu.vx v16, v8, a0
+; CHECK-NEXT: vsrl.vi v16, v16, 29
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vnmsac.vx v8, a0, v16
+; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
%vc = urem <vscale x 16 x i32> %va, %splat
diff --git a/llvm/test/CodeGen/RISCV/select-binop-identity.ll b/llvm/test/CodeGen/RISCV/select-binop-identity.ll
index ebf47cdfd2a1d3e..61344bc8979abb7 100644
--- a/llvm/test/CodeGen/RISCV/select-binop-identity.ll
+++ b/llvm/test/CodeGen/RISCV/select-binop-identity.ll
@@ -266,7 +266,7 @@ define signext i32 @add_select_all_zeros_i32(i1 zeroext %c, i32 signext %x, i32
;
; RV64I-LABEL: add_select_all_zeros_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: addw a0, a2, a0
; RV64I-NEXT: ret
@@ -366,7 +366,7 @@ define signext i32 @sub_select_all_zeros_i32(i1 zeroext %c, i32 signext %x, i32
;
; RV64I-LABEL: sub_select_all_zeros_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: subw a0, a2, a0
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/select-constant-xor.ll b/llvm/test/CodeGen/RISCV/select-constant-xor.ll
index 2e26ae78e2dd8e4..670bdb26cfeb904 100644
--- a/llvm/test/CodeGen/RISCV/select-constant-xor.ll
+++ b/llvm/test/CodeGen/RISCV/select-constant-xor.ll
@@ -225,7 +225,7 @@ define i32 @oneusecmp(i32 %a, i32 %b, i32 %d) {
; RV64-LABEL: oneusecmp:
; RV64: # %bb.0:
; RV64-NEXT: sext.w a3, a0
-; RV64-NEXT: sraiw a0, a0, 31
+; RV64-NEXT: srai a0, a0, 31
; RV64-NEXT: xori a0, a0, 127
; RV64-NEXT: bltz a3, .LBB10_2
; RV64-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
index eacc26c18415da6..2f03ff969205f6a 100644
--- a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
+++ b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
@@ -42,7 +42,7 @@ define i32 @from_cmpeq_fail_bad_andmask(i32 %xx, i32 %y) {
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: addi a0, a0, -9
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: andi a0, a0, 3
; RV64I-NEXT: ret
@@ -142,7 +142,7 @@ define i32 @from_i1_fail_bad_select1(i1 %x, i32 %y) {
;
; RV64I-LABEL: from_i1_fail_bad_select1:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: andi a0, a0, 1
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll
index e07f1d6f594350d..d4a6e9e9dbb4678 100644
--- a/llvm/test/CodeGen/RISCV/select.ll
+++ b/llvm/test/CodeGen/RISCV/select.ll
@@ -147,7 +147,7 @@ define i16 @select_xor_3(i16 %A, i8 %cond) {
; RV64IM-LABEL: select_xor_3:
; RV64IM: # %bb.0: # %entry
; RV64IM-NEXT: andi a1, a1, 1
-; RV64IM-NEXT: addiw a1, a1, -1
+; RV64IM-NEXT: addi a1, a1, -1
; RV64IM-NEXT: andi a1, a1, 43
; RV64IM-NEXT: xor a0, a0, a1
; RV64IM-NEXT: ret
@@ -189,7 +189,7 @@ define i16 @select_xor_3b(i16 %A, i8 %cond) {
; RV64IM-LABEL: select_xor_3b:
; RV64IM: # %bb.0: # %entry
; RV64IM-NEXT: andi a1, a1, 1
-; RV64IM-NEXT: addiw a1, a1, -1
+; RV64IM-NEXT: addi a1, a1, -1
; RV64IM-NEXT: andi a1, a1, 43
; RV64IM-NEXT: xor a0, a0, a1
; RV64IM-NEXT: ret
@@ -627,7 +627,7 @@ define i32 @select_add_2(i1 zeroext %cond, i32 %a, i32 %b) {
;
; RV64IM-LABEL: select_add_2:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: addiw a0, a0, -1
+; RV64IM-NEXT: addi a0, a0, -1
; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: addw a0, a1, a0
; RV64IM-NEXT: ret
@@ -665,7 +665,7 @@ define i32 @select_add_3(i1 zeroext %cond, i32 %a) {
;
; RV64IM-LABEL: select_add_3:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: addiw a0, a0, -1
+; RV64IM-NEXT: addi a0, a0, -1
; RV64IM-NEXT: andi a0, a0, 42
; RV64IM-NEXT: addw a0, a1, a0
; RV64IM-NEXT: ret
@@ -754,7 +754,7 @@ define i32 @select_sub_2(i1 zeroext %cond, i32 %a, i32 %b) {
;
; RV64IM-LABEL: select_sub_2:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: addiw a0, a0, -1
+; RV64IM-NEXT: addi a0, a0, -1
; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: subw a0, a1, a0
; RV64IM-NEXT: ret
@@ -792,7 +792,7 @@ define i32 @select_sub_3(i1 zeroext %cond, i32 %a) {
;
; RV64IM-LABEL: select_sub_3:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: addiw a0, a0, -1
+; RV64IM-NEXT: addi a0, a0, -1
; RV64IM-NEXT: andi a0, a0, 42
; RV64IM-NEXT: subw a0, a1, a0
; RV64IM-NEXT: ret
@@ -1159,7 +1159,7 @@ define i32 @select_shl_2(i1 zeroext %cond, i32 %a, i32 %b) {
;
; RV64IM-LABEL: select_shl_2:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: addiw a0, a0, -1
+; RV64IM-NEXT: addi a0, a0, -1
; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: sllw a0, a1, a0
; RV64IM-NEXT: ret
@@ -1256,7 +1256,7 @@ define i32 @select_ashr_2(i1 zeroext %cond, i32 %a, i32 %b) {
;
; RV64IM-LABEL: select_ashr_2:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: addiw a0, a0, -1
+; RV64IM-NEXT: addi a0, a0, -1
; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: sraw a0, a1, a0
; RV64IM-NEXT: ret
@@ -1353,7 +1353,7 @@ define i32 @select_lshr_2(i1 zeroext %cond, i32 %a, i32 %b) {
;
; RV64IM-LABEL: select_lshr_2:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: addiw a0, a0, -1
+; RV64IM-NEXT: addi a0, a0, -1
; RV64IM-NEXT: and a0, a0, a2
; RV64IM-NEXT: srlw a0, a1, a0
; RV64IM-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll
index f9b72ccb26f3829..3e6893731dd0311 100644
--- a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll
+++ b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll
@@ -76,19 +76,12 @@ define i32 @not_pos_sel_same_variable(i32 signext %a) {
; Compare if positive and select of constants where one constant is zero.
define i32 @pos_sel_constants(i32 signext %a) {
-; RV32-LABEL: pos_sel_constants:
-; RV32: # %bb.0:
-; RV32-NEXT: slti a0, a0, 0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: andi a0, a0, 5
-; RV32-NEXT: ret
-;
-; RV64-LABEL: pos_sel_constants:
-; RV64: # %bb.0:
-; RV64-NEXT: slti a0, a0, 0
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: andi a0, a0, 5
-; RV64-NEXT: ret
+; CHECK-LABEL: pos_sel_constants:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slti a0, a0, 0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: andi a0, a0, 5
+; CHECK-NEXT: ret
%tmp.1 = icmp sgt i32 %a, -1
%retval = select i1 %tmp.1, i32 5, i32 0
ret i32 %retval
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index f9ffb6603fa0e4b..3babef93499c859 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -181,9 +181,9 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind {
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw s1, a1, 819
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw s2, a1, -241
+; RV64I-NEXT: addi s2, a1, -241
; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw s3, a1, 257
+; RV64I-NEXT: addi s3, a1, 257
; RV64I-NEXT: .LBB4_1: # %bb2
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: call bar at plt
@@ -1349,7 +1349,7 @@ define signext i32 @sextw_sh2add(i1 zeroext %0, ptr %1, i32 signext %2, i32 sign
; NOREMOVAL-LABEL: sextw_sh2add:
; NOREMOVAL: # %bb.0:
; NOREMOVAL-NEXT: sh2add a2, a2, a3
-; NOREMOVAL-NEXT: sext.w a2, a2
+; NOREMOVAL-NEXT: mv a2, a2
; NOREMOVAL-NEXT: beqz a0, .LBB22_2
; NOREMOVAL-NEXT: # %bb.1:
; NOREMOVAL-NEXT: sw a2, 0(a1)
diff --git a/llvm/test/CodeGen/RISCV/shl-demanded.ll b/llvm/test/CodeGen/RISCV/shl-demanded.ll
index 4e3c063eff2deab..b0e3ebcd505665c 100644
--- a/llvm/test/CodeGen/RISCV/shl-demanded.ll
+++ b/llvm/test/CodeGen/RISCV/shl-demanded.ll
@@ -239,7 +239,7 @@ define i32 @set_shl_mask(i32 %x, i32 %y) {
; RV64I-LABEL: set_shl_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a2, 16
-; RV64I-NEXT: addiw a3, a2, 1
+; RV64I-NEXT: addi a3, a2, 1
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: and a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/shlimm-addimm.ll b/llvm/test/CodeGen/RISCV/shlimm-addimm.ll
index 4a4762a0db146ca..ead71bcbe113c9e 100644
--- a/llvm/test/CodeGen/RISCV/shlimm-addimm.ll
+++ b/llvm/test/CodeGen/RISCV/shlimm-addimm.ll
@@ -78,7 +78,7 @@ define i32 @shl5_add101024_a(i32 %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: lui a1, 25
-; RV64I-NEXT: addiw a1, a1, -1376
+; RV64I-NEXT: addi a1, a1, -1376
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%tmp0 = shl i32 %x, 5
@@ -99,7 +99,7 @@ define signext i32 @shl5_add101024_b(i32 signext %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: lui a1, 25
-; RV64I-NEXT: addiw a1, a1, -1376
+; RV64I-NEXT: addi a1, a1, -1376
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%tmp0 = shl i32 %x, 5
@@ -146,7 +146,7 @@ define i32 @shl5_add47968_a(i32 %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: lui a1, 12
-; RV64I-NEXT: addiw a1, a1, -1184
+; RV64I-NEXT: addi a1, a1, -1184
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%tmp0 = shl i32 %x, 5
@@ -167,7 +167,7 @@ define signext i32 @shl5_add47968_b(i32 signext %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: lui a1, 12
-; RV64I-NEXT: addiw a1, a1, -1184
+; RV64I-NEXT: addi a1, a1, -1184
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%tmp0 = shl i32 %x, 5
@@ -214,7 +214,7 @@ define i32 @shl5_add47969_a(i32 %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: lui a1, 12
-; RV64I-NEXT: addiw a1, a1, -1183
+; RV64I-NEXT: addi a1, a1, -1183
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%tmp0 = shl i32 %x, 5
@@ -235,7 +235,7 @@ define signext i32 @shl5_add47969_b(i32 signext %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: lui a1, 12
-; RV64I-NEXT: addiw a1, a1, -1183
+; RV64I-NEXT: addi a1, a1, -1183
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%tmp0 = shl i32 %x, 5
@@ -282,7 +282,7 @@ define i32 @shl5_sub47968_a(i32 %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: lui a1, 1048564
-; RV64I-NEXT: addiw a1, a1, 1184
+; RV64I-NEXT: addi a1, a1, 1184
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%tmp0 = shl i32 %x, 5
@@ -303,7 +303,7 @@ define signext i32 @shl5_sub47968_b(i32 signext %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: lui a1, 1048564
-; RV64I-NEXT: addiw a1, a1, 1184
+; RV64I-NEXT: addi a1, a1, 1184
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%tmp0 = shl i32 %x, 5
@@ -351,7 +351,7 @@ define i32 @shl5_sub47969_a(i32 %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: lui a1, 1048564
-; RV64I-NEXT: addiw a1, a1, 1183
+; RV64I-NEXT: addi a1, a1, 1183
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%tmp0 = shl i32 %x, 5
@@ -372,7 +372,7 @@ define signext i32 @shl5_sub47969_b(i32 signext %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: lui a1, 1048564
-; RV64I-NEXT: addiw a1, a1, 1183
+; RV64I-NEXT: addi a1, a1, 1183
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
%tmp0 = shl i32 %x, 5
diff --git a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll
index 4eea8643101b159..0860853ae9c0af9 100644
--- a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll
@@ -265,7 +265,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
;
; RV64-LABEL: add_ugecmp_i16_i8:
; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, -128
+; RV64-NEXT: addi a0, a0, -128
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 56
; RV64-NEXT: sltiu a0, a0, 255
@@ -482,7 +482,7 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
;
; RV64-LABEL: add_ugtcmp_i16_i8:
; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, -128
+; RV64-NEXT: addi a0, a0, -128
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 56
; RV64-NEXT: sltiu a0, a0, 255
@@ -508,7 +508,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
;
; RV64I-LABEL: add_ultcmp_i16_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 128
+; RV64I-NEXT: addi a0, a0, 128
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 256
@@ -523,7 +523,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ultcmp_i16_i8:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 128
+; RV64ZBB-NEXT: addi a0, a0, 128
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 256
; RV64ZBB-NEXT: ret
@@ -688,7 +688,7 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
;
; RV64I-LABEL: add_ulecmp_i16_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 128
+; RV64I-NEXT: addi a0, a0, 128
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 256
@@ -703,7 +703,7 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ulecmp_i16_i8:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 128
+; RV64ZBB-NEXT: addi a0, a0, 128
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 256
; RV64ZBB-NEXT: ret
@@ -784,7 +784,7 @@ define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_cmp:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: zext.h a1, a1
-; RV64ZBB-NEXT: addiw a0, a0, 128
+; RV64ZBB-NEXT: addi a0, a0, 128
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltu a0, a0, a1
; RV64ZBB-NEXT: ret
@@ -805,7 +805,7 @@ define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind {
;
; RV64I-LABEL: add_ultcmp_bad_i8_i16:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 128
+; RV64I-NEXT: addi a0, a0, 128
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 128
@@ -820,7 +820,7 @@ define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ultcmp_bad_i8_i16:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 128
+; RV64ZBB-NEXT: addi a0, a0, 128
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 128
; RV64ZBB-NEXT: ret
@@ -841,7 +841,7 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
;
; RV64I-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 192
+; RV64I-NEXT: addi a0, a0, 192
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 256
@@ -856,7 +856,7 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 192
+; RV64ZBB-NEXT: addi a0, a0, 192
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 256
; RV64ZBB-NEXT: ret
@@ -877,7 +877,7 @@ define i1 @add_ultcmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
;
; RV64I-LABEL: add_ultcmp_bad_i16_i8_c1notpoweroftwo:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 128
+; RV64I-NEXT: addi a0, a0, 128
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 768
@@ -892,7 +892,7 @@ define i1 @add_ultcmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_c1notpoweroftwo:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 128
+; RV64ZBB-NEXT: addi a0, a0, 128
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 768
; RV64ZBB-NEXT: ret
@@ -913,7 +913,7 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
;
; RV64I-LABEL: add_ultcmp_bad_i16_i8_magic:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 64
+; RV64I-NEXT: addi a0, a0, 64
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 256
@@ -928,7 +928,7 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_magic:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 64
+; RV64ZBB-NEXT: addi a0, a0, 64
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 256
; RV64ZBB-NEXT: ret
@@ -949,7 +949,7 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
;
; RV64I-LABEL: add_ultcmp_bad_i16_i4:
; RV64I: # %bb.0:
-; RV64I-NEXT: addiw a0, a0, 8
+; RV64I-NEXT: addi a0, a0, 8
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sltiu a0, a0, 16
@@ -964,7 +964,7 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
;
; RV64ZBB-LABEL: add_ultcmp_bad_i16_i4:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: addiw a0, a0, 8
+; RV64ZBB-NEXT: addi a0, a0, 8
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: sltiu a0, a0, 16
; RV64ZBB-NEXT: ret
@@ -985,7 +985,7 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
;
; RV64-LABEL: add_ultcmp_bad_i24_i8:
; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, 128
+; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: slli a0, a0, 40
; RV64-NEXT: srli a0, a0, 40
; RV64-NEXT: sltiu a0, a0, 256
diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll
index 24e740fd143d131..0dc887e6b30d14b 100644
--- a/llvm/test/CodeGen/RISCV/srem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll
@@ -47,8 +47,8 @@ define i32 @fold_srem_positive_odd(i32 %x) nounwind {
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: add a1, a1, a0
-; RV64IM-NEXT: srliw a2, a1, 31
-; RV64IM-NEXT: sraiw a1, a1, 6
+; RV64IM-NEXT: srli a2, a1, 31
+; RV64IM-NEXT: srai a1, a1, 6
; RV64IM-NEXT: add a1, a1, a2
; RV64IM-NEXT: li a2, 95
; RV64IM-NEXT: mul a1, a1, a2
@@ -198,7 +198,7 @@ define i32 @fold_srem_negative_even(i32 %x) nounwind {
; RV64IM-NEXT: srai a1, a1, 40
; RV64IM-NEXT: add a1, a1, a2
; RV64IM-NEXT: lui a2, 1048570
-; RV64IM-NEXT: addiw a2, a2, 1595
+; RV64IM-NEXT: addi a2, a2, 1595
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: subw a0, a0, a1
; RV64IM-NEXT: ret
@@ -273,8 +273,8 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind {
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: add a1, a1, a0
-; RV64IM-NEXT: srliw a2, a1, 31
-; RV64IM-NEXT: sraiw a1, a1, 6
+; RV64IM-NEXT: srli a2, a1, 31
+; RV64IM-NEXT: srai a1, a1, 6
; RV64IM-NEXT: add a1, a1, a2
; RV64IM-NEXT: li a2, 95
; RV64IM-NEXT: mul a2, a1, a2
@@ -309,8 +309,8 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
;
; RV64I-LABEL: dont_fold_srem_power_of_two:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 26
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 26
; RV64I-NEXT: add a1, a0, a1
; RV64I-NEXT: andi a1, a1, -64
; RV64I-NEXT: subw a0, a0, a1
@@ -318,8 +318,8 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
;
; RV64IM-LABEL: dont_fold_srem_power_of_two:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sraiw a1, a0, 31
-; RV64IM-NEXT: srliw a1, a1, 26
+; RV64IM-NEXT: srai a1, a0, 31
+; RV64IM-NEXT: srli a1, a1, 26
; RV64IM-NEXT: add a1, a0, a1
; RV64IM-NEXT: andi a1, a1, -64
; RV64IM-NEXT: subw a0, a0, a1
@@ -362,8 +362,8 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind {
;
; RV64I-LABEL: dont_fold_srem_i32_smax:
; RV64I: # %bb.0:
-; RV64I-NEXT: sraiw a1, a0, 31
-; RV64I-NEXT: srliw a1, a1, 1
+; RV64I-NEXT: srai a1, a0, 31
+; RV64I-NEXT: srli a1, a1, 1
; RV64I-NEXT: add a1, a0, a1
; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: and a1, a1, a2
@@ -372,8 +372,8 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind {
;
; RV64IM-LABEL: dont_fold_srem_i32_smax:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sraiw a1, a0, 31
-; RV64IM-NEXT: srliw a1, a1, 1
+; RV64IM-NEXT: srai a1, a0, 31
+; RV64IM-NEXT: srli a1, a1, 1
; RV64IM-NEXT: add a1, a0, a1
; RV64IM-NEXT: lui a2, 524288
; RV64IM-NEXT: and a1, a1, a2
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index dcf701be76f62a9..6ed352b51f25459 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -34,7 +34,7 @@ define i1 @test_srem_odd(i29 %X) nounwind {
; RV64-NEXT: addiw a1, a1, 331
; RV64-NEXT: call __muldi3 at plt
; RV64-NEXT: lui a1, 662
-; RV64-NEXT: addiw a1, a1, -83
+; RV64-NEXT: addi a1, a1, -83
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: slli a0, a0, 35
; RV64-NEXT: srli a0, a0, 35
@@ -63,10 +63,10 @@ define i1 @test_srem_odd(i29 %X) nounwind {
; RV64M-LABEL: test_srem_odd:
; RV64M: # %bb.0:
; RV64M-NEXT: lui a1, 128424
-; RV64M-NEXT: addiw a1, a1, 331
+; RV64M-NEXT: addi a1, a1, 331
; RV64M-NEXT: mul a0, a0, a1
; RV64M-NEXT: lui a1, 662
-; RV64M-NEXT: addiw a1, a1, -83
+; RV64M-NEXT: addi a1, a1, -83
; RV64M-NEXT: add a0, a0, a1
; RV64M-NEXT: slli a0, a0, 35
; RV64M-NEXT: srli a0, a0, 35
@@ -93,10 +93,10 @@ define i1 @test_srem_odd(i29 %X) nounwind {
; RV64MV-LABEL: test_srem_odd:
; RV64MV: # %bb.0:
; RV64MV-NEXT: lui a1, 128424
-; RV64MV-NEXT: addiw a1, a1, 331
+; RV64MV-NEXT: addi a1, a1, 331
; RV64MV-NEXT: mul a0, a0, a1
; RV64MV-NEXT: lui a1, 662
-; RV64MV-NEXT: addiw a1, a1, -83
+; RV64MV-NEXT: addi a1, a1, -83
; RV64MV-NEXT: add a0, a0, a1
; RV64MV-NEXT: slli a0, a0, 35
; RV64MV-NEXT: srli a0, a0, 35
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index b5f1efa4b160ba9..2e0c541311e10b2 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -909,7 +909,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
; RV64IM-NEXT: srli a3, a3, 11
; RV64IM-NEXT: add a3, a3, a5
; RV64IM-NEXT: lui a5, 1
-; RV64IM-NEXT: addiw a5, a5, 1327
+; RV64IM-NEXT: addi a5, a5, 1327
; RV64IM-NEXT: mul a3, a3, a5
; RV64IM-NEXT: subw a4, a4, a3
; RV64IM-NEXT: sh zero, 0(a0)
@@ -1053,7 +1053,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV64IM-NEXT: srli a3, a3, 11
; RV64IM-NEXT: add a3, a3, a5
; RV64IM-NEXT: lui a5, 1
-; RV64IM-NEXT: addiw a5, a5, 1327
+; RV64IM-NEXT: addi a5, a5, 1327
; RV64IM-NEXT: mul a3, a3, a5
; RV64IM-NEXT: subw a4, a4, a3
; RV64IM-NEXT: srli a3, a1, 49
diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll
index 3d181c3a30d0947..1b2cc1398ec11cf 100644
--- a/llvm/test/CodeGen/RISCV/urem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll
@@ -44,7 +44,7 @@ define i32 @fold_urem_positive_odd(i32 %x) nounwind {
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 32
; RV64IM-NEXT: lui a2, 364242
-; RV64IM-NEXT: addiw a2, a2, 777
+; RV64IM-NEXT: addi a2, a2, 777
; RV64IM-NEXT: slli a2, a2, 32
; RV64IM-NEXT: mulhu a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 32
@@ -94,7 +94,7 @@ define i32 @fold_urem_positive_even(i32 %x) nounwind {
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 32
; RV64IM-NEXT: lui a2, 1012964
-; RV64IM-NEXT: addiw a2, a2, -61
+; RV64IM-NEXT: addi a2, a2, -61
; RV64IM-NEXT: slli a2, a2, 32
; RV64IM-NEXT: mulhu a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 42
@@ -170,7 +170,7 @@ define i32 @combine_urem_udiv(i32 %x) nounwind {
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 32
; RV64IM-NEXT: lui a2, 364242
-; RV64IM-NEXT: addiw a2, a2, 777
+; RV64IM-NEXT: addi a2, a2, 777
; RV64IM-NEXT: slli a2, a2, 32
; RV64IM-NEXT: mulhu a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 32
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 456d98fd4e47ffe..df30946218dfad8 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -48,7 +48,7 @@ define i1 @test_urem_odd(i13 %X) nounwind {
; RV64M-LABEL: test_urem_odd:
; RV64M: # %bb.0:
; RV64M-NEXT: lui a1, 1
-; RV64M-NEXT: addiw a1, a1, -819
+; RV64M-NEXT: addi a1, a1, -819
; RV64M-NEXT: mul a0, a0, a1
; RV64M-NEXT: slli a0, a0, 51
; RV64M-NEXT: srli a0, a0, 51
@@ -68,7 +68,7 @@ define i1 @test_urem_odd(i13 %X) nounwind {
; RV64MV-LABEL: test_urem_odd:
; RV64MV: # %bb.0:
; RV64MV-NEXT: lui a1, 1
-; RV64MV-NEXT: addiw a1, a1, -819
+; RV64MV-NEXT: addi a1, a1, -819
; RV64MV-NEXT: mul a0, a0, a1
; RV64MV-NEXT: slli a0, a0, 51
; RV64MV-NEXT: srli a0, a0, 51
@@ -139,7 +139,7 @@ define i1 @test_urem_even(i27 %X) nounwind {
; RV64M-LABEL: test_urem_even:
; RV64M: # %bb.0:
; RV64M-NEXT: lui a1, 28087
-; RV64M-NEXT: addiw a1, a1, -585
+; RV64M-NEXT: addi a1, a1, -585
; RV64M-NEXT: mul a0, a0, a1
; RV64M-NEXT: slli a1, a0, 26
; RV64M-NEXT: slli a0, a0, 37
@@ -171,7 +171,7 @@ define i1 @test_urem_even(i27 %X) nounwind {
; RV64MV-LABEL: test_urem_even:
; RV64MV: # %bb.0:
; RV64MV-NEXT: lui a1, 28087
-; RV64MV-NEXT: addiw a1, a1, -585
+; RV64MV-NEXT: addi a1, a1, -585
; RV64MV-NEXT: mul a0, a0, a1
; RV64MV-NEXT: slli a1, a0, 26
; RV64MV-NEXT: slli a0, a0, 37
@@ -405,18 +405,18 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64-NEXT: li a1, 1463
; RV64-NEXT: mv a0, s2
; RV64-NEXT: call __muldi3 at plt
-; RV64-NEXT: addiw a0, a0, -1463
+; RV64-NEXT: addi a0, a0, -1463
; RV64-NEXT: andi a0, a0, 2047
; RV64-NEXT: sltiu s2, a0, 293
; RV64-NEXT: li a1, 819
; RV64-NEXT: mv a0, s1
; RV64-NEXT: call __muldi3 at plt
-; RV64-NEXT: addiw a0, a0, -1638
+; RV64-NEXT: addi a0, a0, -1638
; RV64-NEXT: andi a0, a0, 2047
; RV64-NEXT: sltiu a0, a0, 2
-; RV64-NEXT: addiw s3, s3, -1
+; RV64-NEXT: addi s3, s3, -1
; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: addiw s2, s2, -1
+; RV64-NEXT: addi s2, s2, -1
; RV64-NEXT: andi a1, s3, 2047
; RV64-NEXT: andi a2, s2, 2047
; RV64-NEXT: slli a2, a2, 11
@@ -496,17 +496,17 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64M-NEXT: sltiu a1, a1, 342
; RV64M-NEXT: li a4, 1463
; RV64M-NEXT: mul a3, a3, a4
-; RV64M-NEXT: addiw a3, a3, -1463
+; RV64M-NEXT: addi a3, a3, -1463
; RV64M-NEXT: andi a3, a3, 2047
; RV64M-NEXT: sltiu a3, a3, 293
; RV64M-NEXT: li a4, 819
; RV64M-NEXT: mul a2, a2, a4
-; RV64M-NEXT: addiw a2, a2, -1638
+; RV64M-NEXT: addi a2, a2, -1638
; RV64M-NEXT: andi a2, a2, 2047
; RV64M-NEXT: sltiu a2, a2, 2
-; RV64M-NEXT: addiw a1, a1, -1
+; RV64M-NEXT: addi a1, a1, -1
; RV64M-NEXT: addi a2, a2, -1
-; RV64M-NEXT: addiw a3, a3, -1
+; RV64M-NEXT: addi a3, a3, -1
; RV64M-NEXT: andi a1, a1, 2047
; RV64M-NEXT: andi a3, a3, 2047
; RV64M-NEXT: slli a3, a3, 11
@@ -604,7 +604,7 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64MV-NEXT: vmul.vv v8, v8, v9
; RV64MV-NEXT: vadd.vv v9, v8, v8
; RV64MV-NEXT: lui a2, 41121
-; RV64MV-NEXT: addiw a2, a2, -1527
+; RV64MV-NEXT: addi a2, a2, -1527
; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64MV-NEXT: vmv.s.x v10, a2
; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
index a38ae17f19df385..ac67b9005b3d070 100644
--- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
@@ -755,7 +755,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
; RV64IM-NEXT: subw a1, a1, a3
; RV64IM-NEXT: mulhu a3, a4, a5
; RV64IM-NEXT: lui a5, 1
-; RV64IM-NEXT: addiw a5, a5, 1327
+; RV64IM-NEXT: addi a5, a5, 1327
; RV64IM-NEXT: mul a3, a3, a5
; RV64IM-NEXT: subw a4, a4, a3
; RV64IM-NEXT: sh zero, 0(a0)
diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll
index e8035dd226bf6ce..59aa1d9ae28932a 100644
--- a/llvm/test/CodeGen/RISCV/vararg.ll
+++ b/llvm/test/CodeGen/RISCV/vararg.ll
@@ -566,7 +566,7 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 24
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 8(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 7
+; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 7
; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a0, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: srli a1, a1, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, 8
@@ -593,7 +593,7 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 8
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, -24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 7
+; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 7
; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a0, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: srli a1, a1, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, 8
@@ -888,7 +888,7 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 8(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 7
+; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 7
; LP64-LP64F-LP64D-FPELIM-NEXT: slli a2, a0, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: srli a2, a2, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a2, a2, 8
@@ -915,7 +915,7 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, -24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 7
+; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 7
; LP64-LP64F-LP64D-WITHFP-NEXT: slli a2, a0, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: srli a2, a2, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a2, a2, 8
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index 754237c3456dc03..85d28122537ea9c 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -193,7 +193,7 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) {
; RV64-LABEL: saddo4.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: lui a2, 4096
-; RV64-NEXT: addiw a2, a2, -1
+; RV64-NEXT: addi a2, a2, -1
; RV64-NEXT: addw a2, a0, a2
; RV64-NEXT: slt a0, a2, a0
; RV64-NEXT: sw a2, 0(a1)
@@ -211,7 +211,7 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) {
; RV64ZBA-LABEL: saddo4.i32:
; RV64ZBA: # %bb.0: # %entry
; RV64ZBA-NEXT: lui a2, 4096
-; RV64ZBA-NEXT: addiw a2, a2, -1
+; RV64ZBA-NEXT: addi a2, a2, -1
; RV64ZBA-NEXT: addw a2, a0, a2
; RV64ZBA-NEXT: slt a0, a2, a0
; RV64ZBA-NEXT: sw a2, 0(a1)
@@ -229,7 +229,7 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) {
; RV64ZICOND-LABEL: saddo4.i32:
; RV64ZICOND: # %bb.0: # %entry
; RV64ZICOND-NEXT: lui a2, 4096
-; RV64ZICOND-NEXT: addiw a2, a2, -1
+; RV64ZICOND-NEXT: addi a2, a2, -1
; RV64ZICOND-NEXT: addw a2, a0, a2
; RV64ZICOND-NEXT: slt a0, a2, a0
; RV64ZICOND-NEXT: sw a2, 0(a1)
More information about the llvm-commits
mailing list