[llvm] [RISCV] Remove -riscv-v-fixed-length-vector-lmul-max from tests. NFC (PR #78299)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 16 07:52:48 PST 2024


https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/78299

Some fixed vector tests in test/CodeGen/RISCV/rvv have multiple run lines that
check various configurations of -riscv-v-fixed-length-vector-lmul-max. From
what I understand this flag was introduced in the early days of fixed length
vector support, but now that fixed vector codegen has matured I'm not sure if
it's as relevant today.

This patch proposes to remove the various lmul-max run lines from the tests to
make them more readable, and any changes to fixed vector codegen easier to
review.

We have removed them before for the same reason, so this would take care of the
remaining test cases: https://reviews.llvm.org/D157973#4593268

(I don't have any strong motivation to remove the actual flag itself, my own
personal motivation is just to clean up the tests)


>From c1abec3de1c2a43b4fcf2a5fdce454bea309bb56 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 16 Jan 2024 22:37:23 +0700
Subject: [PATCH] [RISCV] Remove -riscv-v-fixed-length-vector-lmul-max from
 tests. NFC

Some fixed vector tests in test/CodeGen/RISCV/rvv have multiple run lines that
check various configurations of -riscv-v-fixed-length-vector-lmul-max. From
what I understand this flag was introduced in the early days of fixed length
vector support, but now that fixed vector codegen has matured I'm not sure if
it's as relevant today.

This patch proposes to remove the various lmul-max run lines from the tests to
make them more readable, and any changes to fixed vector codegen easier to
review.

We have removed them before for the same reason, so this would take care of the
remaining test cases: https://reviews.llvm.org/D157973#4593268

(I don't have any strong motivation to remove the actual flag itself, my own
personal motivation is just to clean up the tests)
---
 .../CodeGen/RISCV/rvv/fixed-vectors-abs.ll    |  184 +-
 .../RISCV/rvv/fixed-vectors-bitreverse.ll     | 1129 +---
 .../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll  |  450 +-
 .../rvv/fixed-vectors-calling-conv-fastcc.ll  |  541 +-
 .../RISCV/rvv/fixed-vectors-calling-conv.ll   | 1642 +-----
 .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll   | 3424 +++++-------
 .../CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll  | 1177 +---
 .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll   | 3300 +++++------
 .../rvv/fixed-vectors-extload-truncstore.ll   | 1476 +----
 .../RISCV/rvv/fixed-vectors-fp-conv.ll        |  167 +-
 .../RISCV/rvv/fixed-vectors-fp-splat.ll       |  234 +-
 .../RISCV/rvv/fixed-vectors-fp-vrgather.ll    |  117 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     | 4860 ++++++-----------
 .../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll   |  972 +---
 .../CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll   |  906 +--
 .../rvv/fixed-vectors-insert-subvector.ll     |  258 +-
 .../RISCV/rvv/fixed-vectors-int-exttrunc.ll   |  183 +-
 .../RISCV/rvv/fixed-vectors-int-splat.ll      |  827 +--
 .../RISCV/rvv/fixed-vectors-int-vrgather.ll   |  121 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-int.ll    | 4198 ++++----------
 .../RISCV/rvv/fixed-vectors-mask-buildvec.ll  |  468 +-
 .../rvv/fixed-vectors-mask-load-store.ll      |    6 +-
 .../RISCV/rvv/fixed-vectors-mask-splat.ll     |  125 +-
 .../RISCV/rvv/fixed-vectors-stepvector.ll     |  294 +-
 .../rvv/fixed-vectors-vreductions-mask.ll     |  386 +-
 25 files changed, 7653 insertions(+), 19792 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
index 60561262d845d1..37d05f08d0ff3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define void @abs_v16i8(ptr %x) {
 ; CHECK-LABEL: abs_v16i8:
@@ -87,43 +85,15 @@ define void @abs_v2i64(ptr %x) {
 declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
 
 define void @abs_v32i8(ptr %x) {
-; LMULMAX2-LABEL: abs_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: abs_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vrsub.vi v10, v9, 0
-; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: abs_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vrsub.vi v10, v9, 0
-; LMULMAX1-RV64-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: abs_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vrsub.vi v10, v8, 0
+; CHECK-NEXT:    vmax.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 false)
   store <32 x i8> %b, ptr %x
@@ -132,42 +102,14 @@ define void @abs_v32i8(ptr %x) {
 declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1)
 
 define void @abs_v16i16(ptr %x) {
-; LMULMAX2-LABEL: abs_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: abs_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vrsub.vi v10, v9, 0
-; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: abs_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vrsub.vi v10, v9, 0
-; LMULMAX1-RV64-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: abs_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vrsub.vi v10, v8, 0
+; CHECK-NEXT:    vmax.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 false)
   store <16 x i16> %b, ptr %x
@@ -176,42 +118,14 @@ define void @abs_v16i16(ptr %x) {
 declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
 
 define void @abs_v8i32(ptr %x) {
-; LMULMAX2-LABEL: abs_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: abs_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vrsub.vi v10, v9, 0
-; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: abs_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vrsub.vi v10, v9, 0
-; LMULMAX1-RV64-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: abs_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vrsub.vi v10, v8, 0
+; CHECK-NEXT:    vmax.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 false)
   store <8 x i32> %b, ptr %x
@@ -220,42 +134,14 @@ define void @abs_v8i32(ptr %x) {
 declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
 
 define void @abs_v4i64(ptr %x) {
-; LMULMAX2-LABEL: abs_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: abs_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vrsub.vi v10, v9, 0
-; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: abs_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vrsub.vi v10, v9, 0
-; LMULMAX1-RV64-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: abs_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vrsub.vi v10, v8, 0
+; CHECK-NEXT:    vmax.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 false)
   store <4 x i64> %b, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
index 1957829b1288ab..012f943b35d98e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
@@ -1,73 +1,40 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,LMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX1-RV64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
 
 define void @bitreverse_v8i16(ptr %x, ptr %y) {
-; RV32-LABEL: bitreverse_v8i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    vsll.vi v8, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a1, 1
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vand.vx v9, v9, a1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a1, 3
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vand.vx v9, v9, a1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a1, 5
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vand.vx v9, v9, a1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_v8i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    vsll.vi v8, v8, 8
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a1, 1
-; RV64-NEXT:    addi a1, a1, -241
-; RV64-NEXT:    vand.vx v9, v9, a1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a1, 3
-; RV64-NEXT:    addi a1, a1, 819
-; RV64-NEXT:    vand.vx v9, v9, a1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a1, 5
-; RV64-NEXT:    addi a1, a1, 1365
-; RV64-NEXT:    vand.vx v9, v9, a1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vse16.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a1, 1
+; CHECK-NEXT:    addi a1, a1, -241
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a1, 3
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a1, 5
+; CHECK-NEXT:    addi a1, a1, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: bitreverse_v8i16:
 ; ZVBB:       # %bb.0:
@@ -85,83 +52,44 @@ define void @bitreverse_v8i16(ptr %x, ptr %y) {
 declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
 
 define void @bitreverse_v4i32(ptr %x, ptr %y) {
-; RV32-LABEL: bitreverse_v4i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    vsrl.vi v9, v8, 8
-; RV32-NEXT:    lui a1, 16
-; RV32-NEXT:    addi a1, a1, -256
-; RV32-NEXT:    vand.vx v9, v9, a1
-; RV32-NEXT:    vsrl.vi v10, v8, 24
-; RV32-NEXT:    vor.vv v9, v9, v10
-; RV32-NEXT:    vand.vx v10, v8, a1
-; RV32-NEXT:    vsll.vi v10, v10, 8
-; RV32-NEXT:    vsll.vi v8, v8, 24
-; RV32-NEXT:    vor.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vand.vx v9, v9, a1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vsll.vi v8, v8, 4
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 2
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vand.vx v9, v9, a1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vsll.vi v8, v8, 2
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vand.vx v9, v9, a1
-; RV32-NEXT:    vand.vx v8, v8, a1
-; RV32-NEXT:    vadd.vv v8, v8, v8
-; RV32-NEXT:    vor.vv v8, v9, v8
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: bitreverse_v4i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    vsrl.vi v9, v8, 8
-; RV64-NEXT:    lui a1, 16
-; RV64-NEXT:    addi a1, a1, -256
-; RV64-NEXT:    vand.vx v9, v9, a1
-; RV64-NEXT:    vsrl.vi v10, v8, 24
-; RV64-NEXT:    vor.vv v9, v9, v10
-; RV64-NEXT:    vand.vx v10, v8, a1
-; RV64-NEXT:    vsll.vi v10, v10, 8
-; RV64-NEXT:    vsll.vi v8, v8, 24
-; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    lui a1, 61681
-; RV64-NEXT:    addi a1, a1, -241
-; RV64-NEXT:    vand.vx v9, v9, a1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vsll.vi v8, v8, 4
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 2
-; RV64-NEXT:    lui a1, 209715
-; RV64-NEXT:    addi a1, a1, 819
-; RV64-NEXT:    vand.vx v9, v9, a1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vsll.vi v8, v8, 2
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a1, 349525
-; RV64-NEXT:    addi a1, a1, 1365
-; RV64-NEXT:    vand.vx v9, v9, a1
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vor.vv v8, v9, v8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    lui a1, 16
+; CHECK-NEXT:    addi a1, a1, -256
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsrl.vi v10, v8, 24
+; CHECK-NEXT:    vor.vv v9, v9, v10
+; CHECK-NEXT:    vand.vx v10, v8, a1
+; CHECK-NEXT:    vsll.vi v10, v10, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    lui a1, 61681
+; CHECK-NEXT:    addi a1, a1, -241
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    lui a1, 209715
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    lui a1, 349525
+; CHECK-NEXT:    addi a1, a1, 1365
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: bitreverse_v4i32:
 ; ZVBB:       # %bb.0:
@@ -328,171 +256,36 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) {
 declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
 
 define void @bitreverse_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: bitreverse_v16i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    lui a1, 1
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV32-NEXT:    lui a1, 3
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 5
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: bitreverse_v16i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    lui a1, 1
-; LMULMAX2-RV64-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV64-NEXT:    lui a1, 3
-; LMULMAX2-RV64-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 5
-; LMULMAX2-RV64-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: bitreverse_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    lui a2, 1
-; LMULMAX1-RV32-NEXT:    addi a2, a2, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a2
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV32-NEXT:    lui a3, 3
-; LMULMAX1-RV32-NEXT:    addi a3, a3, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a4, 5
-; LMULMAX1-RV32-NEXT:    addi a4, a4, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a2
-; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v9
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: bitreverse_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    lui a2, 1
-; LMULMAX1-RV64-NEXT:    addi a2, a2, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a2
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV64-NEXT:    lui a3, 3
-; LMULMAX1-RV64-NEXT:    addi a3, a3, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a4, 5
-; LMULMAX1-RV64-NEXT:    addi a4, a4, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a2
-; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v9
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    lui a1, 1
+; CHECK-NEXT:    addi a1, a1, -241
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    lui a1, 3
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a1, 5
+; CHECK-NEXT:    addi a1, a1, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: bitreverse_v16i16:
 ; ZVBB:       # %bb.0:
@@ -510,215 +303,44 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) {
 declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
 
 define void @bitreverse_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: bitreverse_v8i32:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV32-NEXT:    lui a1, 16
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -256
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v12, v8, 24
-; LMULMAX2-RV32-NEXT:    vor.vv v10, v10, v12
-; LMULMAX2-RV32-NEXT:    vand.vx v12, v8, a1
-; LMULMAX2-RV32-NEXT:    vsll.vi v12, v12, 8
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    lui a1, 61681
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV32-NEXT:    lui a1, 209715
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 349525
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: bitreverse_v8i32:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV64-NEXT:    lui a1, 16
-; LMULMAX2-RV64-NEXT:    addi a1, a1, -256
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v12, v8, 24
-; LMULMAX2-RV64-NEXT:    vor.vv v10, v10, v12
-; LMULMAX2-RV64-NEXT:    vand.vx v12, v8, a1
-; LMULMAX2-RV64-NEXT:    vsll.vi v12, v12, 8
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    lui a1, 61681
-; LMULMAX2-RV64-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV64-NEXT:    lui a1, 209715
-; LMULMAX2-RV64-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 349525
-; LMULMAX2-RV64-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: bitreverse_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV32-NEXT:    lui a2, 16
-; LMULMAX1-RV32-NEXT:    addi a2, a2, -256
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v8, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v10, v11
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v8, a2
-; LMULMAX1-RV32-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    lui a3, 61681
-; LMULMAX1-RV32-NEXT:    addi a3, a3, -241
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV32-NEXT:    lui a4, 209715
-; LMULMAX1-RV32-NEXT:    addi a4, a4, 819
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a5, 349525
-; LMULMAX1-RV32-NEXT:    addi a5, a5, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a5
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v10, v11
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v9, a2
-; LMULMAX1-RV32-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a5
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v9
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: bitreverse_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV64-NEXT:    lui a2, 16
-; LMULMAX1-RV64-NEXT:    addi a2, a2, -256
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v8, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v10, v11
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v8, a2
-; LMULMAX1-RV64-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    lui a3, 61681
-; LMULMAX1-RV64-NEXT:    addi a3, a3, -241
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV64-NEXT:    lui a4, 209715
-; LMULMAX1-RV64-NEXT:    addi a4, a4, 819
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a5, 349525
-; LMULMAX1-RV64-NEXT:    addi a5, a5, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a5
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v10, v11
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v9, a2
-; LMULMAX1-RV64-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a5
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v9
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: bitreverse_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    lui a1, 16
+; CHECK-NEXT:    addi a1, a1, -256
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vsrl.vi v12, v8, 24
+; CHECK-NEXT:    vor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a1
+; CHECK-NEXT:    vsll.vi v12, v12, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    lui a1, 61681
+; CHECK-NEXT:    addi a1, a1, -241
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 4
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-NEXT:    lui a1, 209715
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a1, 349525
+; CHECK-NEXT:    addi a1, a1, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vor.vv v8, v10, v8
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: bitreverse_v8i32:
 ; ZVBB:       # %bb.0:
@@ -736,349 +358,138 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) {
 declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
 
 define void @bitreverse_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: bitreverse_v4i64:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    addi sp, sp, -16
-; LMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    sw zero, 12(sp)
-; LMULMAX2-RV32-NEXT:    lui a1, 1044480
-; LMULMAX2-RV32-NEXT:    sw a1, 8(sp)
-; LMULMAX2-RV32-NEXT:    li a1, 56
-; LMULMAX2-RV32-NEXT:    vsrl.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    li a2, 40
-; LMULMAX2-RV32-NEXT:    vsrl.vx v12, v8, a2
-; LMULMAX2-RV32-NEXT:    lui a3, 16
-; LMULMAX2-RV32-NEXT:    addi a3, a3, -256
-; LMULMAX2-RV32-NEXT:    vand.vx v12, v12, a3
-; LMULMAX2-RV32-NEXT:    vor.vv v10, v12, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v12, v8, 24
-; LMULMAX2-RV32-NEXT:    addi a4, sp, 8
-; LMULMAX2-RV32-NEXT:    vlse64.v v14, (a4), zero
-; LMULMAX2-RV32-NEXT:    lui a4, 4080
-; LMULMAX2-RV32-NEXT:    vand.vx v12, v12, a4
-; LMULMAX2-RV32-NEXT:    vsrl.vi v16, v8, 8
-; LMULMAX2-RV32-NEXT:    vand.vv v16, v16, v14
-; LMULMAX2-RV32-NEXT:    vor.vv v12, v16, v12
-; LMULMAX2-RV32-NEXT:    vor.vv v10, v12, v10
-; LMULMAX2-RV32-NEXT:    vsll.vx v12, v8, a1
-; LMULMAX2-RV32-NEXT:    vand.vx v16, v8, a3
-; LMULMAX2-RV32-NEXT:    vsll.vx v16, v16, a2
-; LMULMAX2-RV32-NEXT:    vor.vv v12, v12, v16
-; LMULMAX2-RV32-NEXT:    vand.vx v16, v8, a4
-; LMULMAX2-RV32-NEXT:    vsll.vi v16, v16, 24
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v14
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v16, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v12, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    lui a1, 61681
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vand.vv v10, v10, v12
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV32-NEXT:    lui a1, 209715
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vand.vv v10, v10, v12
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    lui a1, 349525
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vand.vv v10, v10, v12
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    addi sp, sp, 16
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: bitreverse_v4i64:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    li a1, 56
-; LMULMAX2-RV64-NEXT:    vsrl.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    li a2, 40
-; LMULMAX2-RV64-NEXT:    vsrl.vx v12, v8, a2
-; LMULMAX2-RV64-NEXT:    lui a3, 16
-; LMULMAX2-RV64-NEXT:    addiw a3, a3, -256
-; LMULMAX2-RV64-NEXT:    vand.vx v12, v12, a3
-; LMULMAX2-RV64-NEXT:    vor.vv v10, v12, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v12, v8, 24
-; LMULMAX2-RV64-NEXT:    lui a4, 4080
-; LMULMAX2-RV64-NEXT:    vand.vx v12, v12, a4
-; LMULMAX2-RV64-NEXT:    vsrl.vi v14, v8, 8
-; LMULMAX2-RV64-NEXT:    li a5, 255
-; LMULMAX2-RV64-NEXT:    slli a5, a5, 24
-; LMULMAX2-RV64-NEXT:    vand.vx v14, v14, a5
-; LMULMAX2-RV64-NEXT:    vor.vv v12, v14, v12
-; LMULMAX2-RV64-NEXT:    vor.vv v10, v12, v10
-; LMULMAX2-RV64-NEXT:    vand.vx v12, v8, a5
-; LMULMAX2-RV64-NEXT:    vsll.vi v12, v12, 8
-; LMULMAX2-RV64-NEXT:    vand.vx v14, v8, a4
-; LMULMAX2-RV64-NEXT:    vsll.vi v14, v14, 24
-; LMULMAX2-RV64-NEXT:    vor.vv v12, v14, v12
-; LMULMAX2-RV64-NEXT:    vsll.vx v14, v8, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a3
-; LMULMAX2-RV64-NEXT:    vsll.vx v8, v8, a2
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v14, v8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    lui a1, 61681
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV64-NEXT:    lui a1, 209715
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 349525
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: bitreverse_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi sp, sp, -16
-; LMULMAX1-RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    sw zero, 12(sp)
-; LMULMAX1-RV32-NEXT:    lui a2, 1044480
-; LMULMAX1-RV32-NEXT:    sw a2, 8(sp)
-; LMULMAX1-RV32-NEXT:    li a2, 56
-; LMULMAX1-RV32-NEXT:    vsrl.vx v10, v9, a2
-; LMULMAX1-RV32-NEXT:    li a3, 40
-; LMULMAX1-RV32-NEXT:    vsrl.vx v11, v9, a3
-; LMULMAX1-RV32-NEXT:    lui a4, 16
-; LMULMAX1-RV32-NEXT:    addi a4, a4, -256
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a4
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV32-NEXT:    addi a5, sp, 8
-; LMULMAX1-RV32-NEXT:    vlse64.v v12, (a5), zero
-; LMULMAX1-RV32-NEXT:    lui a5, 4080
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v13, v9, 8
-; LMULMAX1-RV32-NEXT:    vand.vv v13, v13, v12
-; LMULMAX1-RV32-NEXT:    vor.vv v11, v13, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v11, v9, v12
-; LMULMAX1-RV32-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV32-NEXT:    vand.vx v13, v9, a5
-; LMULMAX1-RV32-NEXT:    vsll.vi v13, v13, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v11, v13, v11
-; LMULMAX1-RV32-NEXT:    vsll.vx v13, v9, a2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vsll.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v13, v9
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    lui a6, 61681
-; LMULMAX1-RV32-NEXT:    addi a6, a6, -241
-; LMULMAX1-RV32-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v11, a6
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v11
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV32-NEXT:    lui a6, 209715
-; LMULMAX1-RV32-NEXT:    addi a6, a6, 819
-; LMULMAX1-RV32-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v13, a6
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v13
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v13
-; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    lui a6, 349525
-; LMULMAX1-RV32-NEXT:    addi a6, a6, 1365
-; LMULMAX1-RV32-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v14, a6
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v14
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v14
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v9
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vx v10, v8, a2
-; LMULMAX1-RV32-NEXT:    vsrl.vx v15, v8, a3
-; LMULMAX1-RV32-NEXT:    vand.vx v15, v15, a4
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v15, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v15, v8, 24
-; LMULMAX1-RV32-NEXT:    vand.vx v15, v15, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v16, v8, 8
-; LMULMAX1-RV32-NEXT:    vand.vv v16, v16, v12
-; LMULMAX1-RV32-NEXT:    vor.vv v15, v16, v15
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v15, v10
-; LMULMAX1-RV32-NEXT:    vsll.vx v15, v8, a2
-; LMULMAX1-RV32-NEXT:    vand.vx v16, v8, a4
-; LMULMAX1-RV32-NEXT:    vsll.vx v16, v16, a3
-; LMULMAX1-RV32-NEXT:    vor.vv v15, v15, v16
-; LMULMAX1-RV32-NEXT:    vand.vx v16, v8, a5
-; LMULMAX1-RV32-NEXT:    vsll.vi v16, v16, 24
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v12
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v16, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v15, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v11
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v13
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v13
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v10, v14
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v14
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    addi sp, sp, 16
-; LMULMAX1-RV32-NEXT:    ret
+; RV32-LABEL: bitreverse_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    sw zero, 12(sp)
+; RV32-NEXT:    lui a1, 1044480
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    vsrl.vx v10, v8, a1
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    vsrl.vx v12, v8, a2
+; RV32-NEXT:    lui a3, 16
+; RV32-NEXT:    addi a3, a3, -256
+; RV32-NEXT:    vand.vx v12, v12, a3
+; RV32-NEXT:    vor.vv v10, v12, v10
+; RV32-NEXT:    vsrl.vi v12, v8, 24
+; RV32-NEXT:    addi a4, sp, 8
+; RV32-NEXT:    vlse64.v v14, (a4), zero
+; RV32-NEXT:    lui a4, 4080
+; RV32-NEXT:    vand.vx v12, v12, a4
+; RV32-NEXT:    vsrl.vi v16, v8, 8
+; RV32-NEXT:    vand.vv v16, v16, v14
+; RV32-NEXT:    vor.vv v12, v16, v12
+; RV32-NEXT:    vor.vv v10, v12, v10
+; RV32-NEXT:    vsll.vx v12, v8, a1
+; RV32-NEXT:    vand.vx v16, v8, a3
+; RV32-NEXT:    vsll.vx v16, v16, a2
+; RV32-NEXT:    vor.vv v12, v12, v16
+; RV32-NEXT:    vand.vx v16, v8, a4
+; RV32-NEXT:    vsll.vi v16, v16, 24
+; RV32-NEXT:    vand.vv v8, v8, v14
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vsll.vi v8, v8, 4
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 2
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vsll.vi v8, v8, 2
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 1
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vv v8, v10, v8
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
 ;
-; LMULMAX1-RV64-LABEL: bitreverse_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    li a2, 56
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v9, a2
-; LMULMAX1-RV64-NEXT:    li a3, 40
-; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v9, a3
-; LMULMAX1-RV64-NEXT:    lui a4, 16
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, -256
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a4
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV64-NEXT:    lui a5, 4080
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v12, v9, 8
-; LMULMAX1-RV64-NEXT:    li a6, 255
-; LMULMAX1-RV64-NEXT:    slli a6, a6, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a6
-; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v9, a6
-; LMULMAX1-RV64-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v9, a5
-; LMULMAX1-RV64-NEXT:    vsll.vi v12, v12, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vsll.vx v12, v9, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vsll.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v12, v9
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    lui a7, 61681
-; LMULMAX1-RV64-NEXT:    addiw a7, a7, -241
-; LMULMAX1-RV64-NEXT:    slli t0, a7, 32
-; LMULMAX1-RV64-NEXT:    add a7, a7, t0
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a7
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a7
-; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV64-NEXT:    lui t0, 209715
-; LMULMAX1-RV64-NEXT:    addiw t0, t0, 819
-; LMULMAX1-RV64-NEXT:    slli t1, t0, 32
-; LMULMAX1-RV64-NEXT:    add t0, t0, t1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, t0
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, t0
-; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    lui t1, 349525
-; LMULMAX1-RV64-NEXT:    addiw t1, t1, 1365
-; LMULMAX1-RV64-NEXT:    slli t2, t1, 32
-; LMULMAX1-RV64-NEXT:    add t1, t1, t2
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, t1
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, t1
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v9
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v8, a2
-; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v8, a3
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a4
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v8, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v12, v8, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a6
-; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v8, a6
-; LMULMAX1-RV64-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v8, a5
-; LMULMAX1-RV64-NEXT:    vsll.vi v12, v12, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vsll.vx v12, v8, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    vsll.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v12, v8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a7
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a7
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, t0
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, t0
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, t1
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, t1
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; RV64-LABEL: bitreverse_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    li a1, 56
+; RV64-NEXT:    vsrl.vx v10, v8, a1
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    vsrl.vx v12, v8, a2
+; RV64-NEXT:    lui a3, 16
+; RV64-NEXT:    addiw a3, a3, -256
+; RV64-NEXT:    vand.vx v12, v12, a3
+; RV64-NEXT:    vor.vv v10, v12, v10
+; RV64-NEXT:    vsrl.vi v12, v8, 24
+; RV64-NEXT:    lui a4, 4080
+; RV64-NEXT:    vand.vx v12, v12, a4
+; RV64-NEXT:    vsrl.vi v14, v8, 8
+; RV64-NEXT:    li a5, 255
+; RV64-NEXT:    slli a5, a5, 24
+; RV64-NEXT:    vand.vx v14, v14, a5
+; RV64-NEXT:    vor.vv v12, v14, v12
+; RV64-NEXT:    vor.vv v10, v12, v10
+; RV64-NEXT:    vand.vx v12, v8, a5
+; RV64-NEXT:    vsll.vi v12, v12, 8
+; RV64-NEXT:    vand.vx v14, v8, a4
+; RV64-NEXT:    vsll.vi v14, v14, 24
+; RV64-NEXT:    vor.vv v12, v14, v12
+; RV64-NEXT:    vsll.vx v14, v8, a1
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vsll.vx v8, v8, a2
+; RV64-NEXT:    vor.vv v8, v14, v8
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsll.vi v8, v8, 4
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 2
+; RV64-NEXT:    lui a1, 209715
+; RV64-NEXT:    addiw a1, a1, 819
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsll.vi v8, v8, 2
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    lui a1, 349525
+; RV64-NEXT:    addiw a1, a1, 1365
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vor.vv v8, v10, v8
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
 ;
 ; ZVBB-LABEL: bitreverse_v4i64:
 ; ZVBB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
index 41f74255c31b06..d5338f9b6c6fc0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVKB
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVKB
 
@@ -159,57 +157,15 @@ define void @bswap_v2i64(ptr %x, ptr %y) {
 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
 
 define void @bswap_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: bswap_v16i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: bswap_v16i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: bswap_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: bswap_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: bswap_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVKB-LABEL: bswap_v16i16:
 ; ZVKB:       # %bb.0:
@@ -227,101 +183,23 @@ define void @bswap_v16i16(ptr %x, ptr %y) {
 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
 
 define void @bswap_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: bswap_v8i32:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV32-NEXT:    lui a1, 16
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -256
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v12, v8, 24
-; LMULMAX2-RV32-NEXT:    vor.vv v10, v10, v12
-; LMULMAX2-RV32-NEXT:    vand.vx v12, v8, a1
-; LMULMAX2-RV32-NEXT:    vsll.vi v12, v12, 8
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: bswap_v8i32:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV64-NEXT:    lui a1, 16
-; LMULMAX2-RV64-NEXT:    addi a1, a1, -256
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v12, v8, 24
-; LMULMAX2-RV64-NEXT:    vor.vv v10, v10, v12
-; LMULMAX2-RV64-NEXT:    vand.vx v12, v8, a1
-; LMULMAX2-RV64-NEXT:    vsll.vi v12, v12, 8
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: bswap_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV32-NEXT:    lui a2, 16
-; LMULMAX1-RV32-NEXT:    addi a2, a2, -256
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v8, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v10, v11
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v8, a2
-; LMULMAX1-RV32-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v10, v11
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v9, a2
-; LMULMAX1-RV32-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: bswap_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-RV64-NEXT:    lui a2, 16
-; LMULMAX1-RV64-NEXT:    addi a2, a2, -256
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v8, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v10, v11
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v8, a2
-; LMULMAX1-RV64-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v10, v11
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v9, a2
-; LMULMAX1-RV64-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: bswap_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v10, v8, 8
+; CHECK-NEXT:    lui a1, 16
+; CHECK-NEXT:    addi a1, a1, -256
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vsrl.vi v12, v8, 24
+; CHECK-NEXT:    vor.vv v10, v10, v12
+; CHECK-NEXT:    vand.vx v12, v8, a1
+; CHECK-NEXT:    vsll.vi v12, v12, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 24
+; CHECK-NEXT:    vor.vv v8, v8, v12
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVKB-LABEL: bswap_v8i32:
 ; ZVKB:       # %bb.0:
@@ -339,205 +217,81 @@ define void @bswap_v8i32(ptr %x, ptr %y) {
 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
 
 define void @bswap_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: bswap_v4i64:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    addi sp, sp, -16
-; LMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    sw zero, 12(sp)
-; LMULMAX2-RV32-NEXT:    lui a1, 1044480
-; LMULMAX2-RV32-NEXT:    sw a1, 8(sp)
-; LMULMAX2-RV32-NEXT:    li a1, 56
-; LMULMAX2-RV32-NEXT:    vsrl.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    li a2, 40
-; LMULMAX2-RV32-NEXT:    vsrl.vx v12, v8, a2
-; LMULMAX2-RV32-NEXT:    lui a3, 16
-; LMULMAX2-RV32-NEXT:    addi a3, a3, -256
-; LMULMAX2-RV32-NEXT:    vand.vx v12, v12, a3
-; LMULMAX2-RV32-NEXT:    vor.vv v10, v12, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v12, v8, 24
-; LMULMAX2-RV32-NEXT:    addi a4, sp, 8
-; LMULMAX2-RV32-NEXT:    vlse64.v v14, (a4), zero
-; LMULMAX2-RV32-NEXT:    lui a4, 4080
-; LMULMAX2-RV32-NEXT:    vand.vx v12, v12, a4
-; LMULMAX2-RV32-NEXT:    vsrl.vi v16, v8, 8
-; LMULMAX2-RV32-NEXT:    vand.vv v16, v16, v14
-; LMULMAX2-RV32-NEXT:    vor.vv v12, v16, v12
-; LMULMAX2-RV32-NEXT:    vor.vv v10, v12, v10
-; LMULMAX2-RV32-NEXT:    vsll.vx v12, v8, a1
-; LMULMAX2-RV32-NEXT:    vand.vx v16, v8, a3
-; LMULMAX2-RV32-NEXT:    vsll.vx v16, v16, a2
-; LMULMAX2-RV32-NEXT:    vor.vv v12, v12, v16
-; LMULMAX2-RV32-NEXT:    vand.vx v16, v8, a4
-; LMULMAX2-RV32-NEXT:    vsll.vi v16, v16, 24
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v14
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v16, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v12, v8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    addi sp, sp, 16
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: bswap_v4i64:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    li a1, 56
-; LMULMAX2-RV64-NEXT:    vsrl.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    li a2, 40
-; LMULMAX2-RV64-NEXT:    vsrl.vx v12, v8, a2
-; LMULMAX2-RV64-NEXT:    lui a3, 16
-; LMULMAX2-RV64-NEXT:    addiw a3, a3, -256
-; LMULMAX2-RV64-NEXT:    vand.vx v12, v12, a3
-; LMULMAX2-RV64-NEXT:    vor.vv v10, v12, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v12, v8, 24
-; LMULMAX2-RV64-NEXT:    lui a4, 4080
-; LMULMAX2-RV64-NEXT:    vand.vx v12, v12, a4
-; LMULMAX2-RV64-NEXT:    vsrl.vi v14, v8, 8
-; LMULMAX2-RV64-NEXT:    li a5, 255
-; LMULMAX2-RV64-NEXT:    slli a5, a5, 24
-; LMULMAX2-RV64-NEXT:    vand.vx v14, v14, a5
-; LMULMAX2-RV64-NEXT:    vor.vv v12, v14, v12
-; LMULMAX2-RV64-NEXT:    vor.vv v10, v12, v10
-; LMULMAX2-RV64-NEXT:    vand.vx v12, v8, a5
-; LMULMAX2-RV64-NEXT:    vsll.vi v12, v12, 8
-; LMULMAX2-RV64-NEXT:    vand.vx v14, v8, a4
-; LMULMAX2-RV64-NEXT:    vsll.vi v14, v14, 24
-; LMULMAX2-RV64-NEXT:    vor.vv v12, v14, v12
-; LMULMAX2-RV64-NEXT:    vsll.vx v14, v8, a1
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a3
-; LMULMAX2-RV64-NEXT:    vsll.vx v8, v8, a2
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v14, v8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: bswap_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi sp, sp, -16
-; LMULMAX1-RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    sw zero, 12(sp)
-; LMULMAX1-RV32-NEXT:    lui a2, 1044480
-; LMULMAX1-RV32-NEXT:    sw a2, 8(sp)
-; LMULMAX1-RV32-NEXT:    li a2, 56
-; LMULMAX1-RV32-NEXT:    vsrl.vx v10, v9, a2
-; LMULMAX1-RV32-NEXT:    li a3, 40
-; LMULMAX1-RV32-NEXT:    vsrl.vx v11, v9, a3
-; LMULMAX1-RV32-NEXT:    lui a4, 16
-; LMULMAX1-RV32-NEXT:    addi a4, a4, -256
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a4
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV32-NEXT:    addi a5, sp, 8
-; LMULMAX1-RV32-NEXT:    vlse64.v v12, (a5), zero
-; LMULMAX1-RV32-NEXT:    lui a5, 4080
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v13, v9, 8
-; LMULMAX1-RV32-NEXT:    vand.vv v13, v13, v12
-; LMULMAX1-RV32-NEXT:    vor.vv v11, v13, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v11, v9, v12
-; LMULMAX1-RV32-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV32-NEXT:    vand.vx v13, v9, a5
-; LMULMAX1-RV32-NEXT:    vsll.vi v13, v13, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v11, v13, v11
-; LMULMAX1-RV32-NEXT:    vsll.vx v13, v9, a2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vsll.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v13, v9
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vx v10, v8, a2
-; LMULMAX1-RV32-NEXT:    vsrl.vx v11, v8, a3
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a4
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v8, 24
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a5
-; LMULMAX1-RV32-NEXT:    vsrl.vi v13, v8, 8
-; LMULMAX1-RV32-NEXT:    vand.vv v13, v13, v12
-; LMULMAX1-RV32-NEXT:    vor.vv v11, v13, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV32-NEXT:    vsll.vx v11, v8, a2
-; LMULMAX1-RV32-NEXT:    vand.vx v13, v8, a4
-; LMULMAX1-RV32-NEXT:    vsll.vx v13, v13, a3
-; LMULMAX1-RV32-NEXT:    vor.vv v11, v11, v13
-; LMULMAX1-RV32-NEXT:    vand.vx v13, v8, a5
-; LMULMAX1-RV32-NEXT:    vsll.vi v13, v13, 24
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v12
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v13, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v11, v8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    addi sp, sp, 16
-; LMULMAX1-RV32-NEXT:    ret
+; RV32-LABEL: bswap_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    sw zero, 12(sp)
+; RV32-NEXT:    lui a1, 1044480
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    vsrl.vx v10, v8, a1
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    vsrl.vx v12, v8, a2
+; RV32-NEXT:    lui a3, 16
+; RV32-NEXT:    addi a3, a3, -256
+; RV32-NEXT:    vand.vx v12, v12, a3
+; RV32-NEXT:    vor.vv v10, v12, v10
+; RV32-NEXT:    vsrl.vi v12, v8, 24
+; RV32-NEXT:    addi a4, sp, 8
+; RV32-NEXT:    vlse64.v v14, (a4), zero
+; RV32-NEXT:    lui a4, 4080
+; RV32-NEXT:    vand.vx v12, v12, a4
+; RV32-NEXT:    vsrl.vi v16, v8, 8
+; RV32-NEXT:    vand.vv v16, v16, v14
+; RV32-NEXT:    vor.vv v12, v16, v12
+; RV32-NEXT:    vor.vv v10, v12, v10
+; RV32-NEXT:    vsll.vx v12, v8, a1
+; RV32-NEXT:    vand.vx v16, v8, a3
+; RV32-NEXT:    vsll.vx v16, v16, a2
+; RV32-NEXT:    vor.vv v12, v12, v16
+; RV32-NEXT:    vand.vx v16, v8, a4
+; RV32-NEXT:    vsll.vi v16, v16, 24
+; RV32-NEXT:    vand.vv v8, v8, v14
+; RV32-NEXT:    vsll.vi v8, v8, 8
+; RV32-NEXT:    vor.vv v8, v16, v8
+; RV32-NEXT:    vor.vv v8, v12, v8
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
 ;
-; LMULMAX1-RV64-LABEL: bswap_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    li a2, 56
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v8, a2
-; LMULMAX1-RV64-NEXT:    li a3, 40
-; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v8, a3
-; LMULMAX1-RV64-NEXT:    lui a4, 16
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, -256
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a4
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v8, 24
-; LMULMAX1-RV64-NEXT:    lui a5, 4080
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v12, v8, 8
-; LMULMAX1-RV64-NEXT:    li a6, 255
-; LMULMAX1-RV64-NEXT:    slli a6, a6, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a6
-; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v8, a6
-; LMULMAX1-RV64-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v8, a5
-; LMULMAX1-RV64-NEXT:    vsll.vi v12, v12, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vsll.vx v12, v8, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    vsll.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v12, v8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v9, a2
-; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v9, a3
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a4
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vi v12, v9, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a6
-; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v9, a6
-; LMULMAX1-RV64-NEXT:    vsll.vi v11, v11, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v9, a5
-; LMULMAX1-RV64-NEXT:    vsll.vi v12, v12, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vsll.vx v12, v9, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vsll.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v12, v9
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v11
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; RV64-LABEL: bswap_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    li a1, 56
+; RV64-NEXT:    vsrl.vx v10, v8, a1
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    vsrl.vx v12, v8, a2
+; RV64-NEXT:    lui a3, 16
+; RV64-NEXT:    addiw a3, a3, -256
+; RV64-NEXT:    vand.vx v12, v12, a3
+; RV64-NEXT:    vor.vv v10, v12, v10
+; RV64-NEXT:    vsrl.vi v12, v8, 24
+; RV64-NEXT:    lui a4, 4080
+; RV64-NEXT:    vand.vx v12, v12, a4
+; RV64-NEXT:    vsrl.vi v14, v8, 8
+; RV64-NEXT:    li a5, 255
+; RV64-NEXT:    slli a5, a5, 24
+; RV64-NEXT:    vand.vx v14, v14, a5
+; RV64-NEXT:    vor.vv v12, v14, v12
+; RV64-NEXT:    vor.vv v10, v12, v10
+; RV64-NEXT:    vand.vx v12, v8, a5
+; RV64-NEXT:    vsll.vi v12, v12, 8
+; RV64-NEXT:    vand.vx v14, v8, a4
+; RV64-NEXT:    vsll.vi v14, v14, 24
+; RV64-NEXT:    vor.vv v12, v14, v12
+; RV64-NEXT:    vsll.vx v14, v8, a1
+; RV64-NEXT:    vand.vx v8, v8, a3
+; RV64-NEXT:    vsll.vx v8, v8, a2
+; RV64-NEXT:    vor.vv v8, v14, v8
+; RV64-NEXT:    vor.vv v8, v8, v12
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
 ;
 ; ZVKB-LABEL: bswap_v4i64:
 ; ZVKB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
index 9ec15e5710f72d..63cd42e97ef6f1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define fastcc <4 x i8> @ret_v4i8(ptr %p) {
 ; CHECK-LABEL: ret_v4i8:
@@ -33,19 +32,11 @@ define fastcc <8 x i32> @ret_v8i32(ptr %p) {
 }
 
 define fastcc <16 x i64> @ret_v16i64(ptr %p) {
-; LMULMAX8-LABEL: ret_v16i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v16i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vle64.v v8, (a0)
-; LMULMAX4-NEXT:    addi a0, a0, 64
-; LMULMAX4-NEXT:    vle64.v v12, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: ret_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    ret
   %v = load <16 x i64>, ptr %p
   ret <16 x i64> %v
 }
@@ -73,86 +64,39 @@ define fastcc <32 x i1> @ret_mask_v32i1(ptr %p) {
 
 ; Return the vector via registers v8-v23
 define fastcc <64 x i32> @ret_split_v64i32(ptr %x) {
-; LMULMAX8-LABEL: ret_split_v64i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    addi a0, a0, 128
-; LMULMAX8-NEXT:    vle32.v v16, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_split_v64i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v8, (a0)
-; LMULMAX4-NEXT:    addi a1, a0, 64
-; LMULMAX4-NEXT:    vle32.v v12, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 128
-; LMULMAX4-NEXT:    vle32.v v16, (a1)
-; LMULMAX4-NEXT:    addi a0, a0, 192
-; LMULMAX4-NEXT:    vle32.v v20, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: ret_split_v64i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vle32.v v16, (a0)
+; CHECK-NEXT:    ret
   %v = load <64 x i32>, ptr %x
   ret <64 x i32> %v
 }
 
 ; Return the vector fully via the stack
 define fastcc <128 x i32> @ret_split_v128i32(ptr %x) {
-; LMULMAX8-LABEL: ret_split_v128i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi a2, a1, 128
-; LMULMAX8-NEXT:    li a3, 32
-; LMULMAX8-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a2)
-; LMULMAX8-NEXT:    addi a2, a1, 256
-; LMULMAX8-NEXT:    vle32.v v16, (a1)
-; LMULMAX8-NEXT:    addi a1, a1, 384
-; LMULMAX8-NEXT:    vle32.v v24, (a1)
-; LMULMAX8-NEXT:    vle32.v v0, (a2)
-; LMULMAX8-NEXT:    vse32.v v16, (a0)
-; LMULMAX8-NEXT:    addi a1, a0, 384
-; LMULMAX8-NEXT:    vse32.v v24, (a1)
-; LMULMAX8-NEXT:    addi a1, a0, 256
-; LMULMAX8-NEXT:    vse32.v v0, (a1)
-; LMULMAX8-NEXT:    addi a0, a0, 128
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_split_v128i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi a2, a1, 64
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v8, (a2)
-; LMULMAX4-NEXT:    addi a2, a1, 128
-; LMULMAX4-NEXT:    vle32.v v12, (a2)
-; LMULMAX4-NEXT:    addi a2, a1, 192
-; LMULMAX4-NEXT:    vle32.v v16, (a2)
-; LMULMAX4-NEXT:    addi a2, a1, 256
-; LMULMAX4-NEXT:    vle32.v v20, (a2)
-; LMULMAX4-NEXT:    addi a2, a1, 320
-; LMULMAX4-NEXT:    vle32.v v24, (a2)
-; LMULMAX4-NEXT:    addi a2, a1, 384
-; LMULMAX4-NEXT:    vle32.v v28, (a1)
-; LMULMAX4-NEXT:    addi a1, a1, 448
-; LMULMAX4-NEXT:    vle32.v v0, (a1)
-; LMULMAX4-NEXT:    vle32.v v4, (a2)
-; LMULMAX4-NEXT:    vse32.v v28, (a0)
-; LMULMAX4-NEXT:    addi a1, a0, 448
-; LMULMAX4-NEXT:    vse32.v v0, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 384
-; LMULMAX4-NEXT:    vse32.v v4, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 320
-; LMULMAX4-NEXT:    vse32.v v24, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 256
-; LMULMAX4-NEXT:    vse32.v v20, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 192
-; LMULMAX4-NEXT:    vse32.v v16, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 128
-; LMULMAX4-NEXT:    vse32.v v12, (a1)
-; LMULMAX4-NEXT:    addi a0, a0, 64
-; LMULMAX4-NEXT:    vse32.v v8, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: ret_split_v128i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a1, 128
+; CHECK-NEXT:    li a3, 32
+; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a2)
+; CHECK-NEXT:    addi a2, a1, 256
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    addi a1, a1, 384
+; CHECK-NEXT:    vle32.v v24, (a1)
+; CHECK-NEXT:    vle32.v v0, (a2)
+; CHECK-NEXT:    vse32.v v16, (a0)
+; CHECK-NEXT:    addi a1, a0, 384
+; CHECK-NEXT:    vse32.v v24, (a1)
+; CHECK-NEXT:    addi a1, a0, 256
+; CHECK-NEXT:    vse32.v v0, (a1)
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %v = load <128 x i32>, ptr %x
   ret <128 x i32> %v
 }
@@ -209,29 +153,15 @@ define fastcc <32 x i1> @ret_v32i1_param_v32i1_v32i1(<32 x i1> %v, <32 x i1> %w)
 }
 
 define fastcc <32 x i32> @ret_v32i32_param_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) {
-; LMULMAX8-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a2, 32
-; LMULMAX8-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v24, (a0)
-; LMULMAX8-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX8-NEXT:    vadd.vv v8, v8, v24
-; LMULMAX8-NEXT:    vadd.vx v8, v8, a1
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    addi a1, a0, 64
-; LMULMAX4-NEXT:    vle32.v v24, (a1)
-; LMULMAX4-NEXT:    vle32.v v28, (a0)
-; LMULMAX4-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX4-NEXT:    vadd.vv v12, v12, v20
-; LMULMAX4-NEXT:    vadd.vv v12, v12, v24
-; LMULMAX4-NEXT:    vadd.vv v8, v8, v28
-; LMULMAX4-NEXT:    vadd.vx v8, v8, a2
-; LMULMAX4-NEXT:    vadd.vx v12, v12, a2
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v24, (a0)
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    vadd.vx v8, v8, a1
+; CHECK-NEXT:    ret
   %r = add <32 x i32> %x, %y
   %s = add <32 x i32> %r, %z
   %head = insertelement <32 x i32> poison, i32 %w, i32 0
@@ -244,98 +174,50 @@ declare <32 x i32> @ext2(<32 x i32>, <32 x i32>, i32, i32)
 declare <32 x i32> @ext3(<32 x i32>, <32 x i32>, <32 x i32>, i32, i32)
 
 define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, i32 %w) {
-; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi sp, sp, -16
-; LMULMAX8-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX8-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    .cfi_offset ra, -8
-; LMULMAX8-NEXT:    vmv8r.v v24, v8
-; LMULMAX8-NEXT:    li a1, 2
-; LMULMAX8-NEXT:    vmv8r.v v8, v16
-; LMULMAX8-NEXT:    vmv8r.v v16, v24
-; LMULMAX8-NEXT:    call ext2
-; LMULMAX8-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    addi sp, sp, 16
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi sp, sp, -16
-; LMULMAX4-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX4-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    .cfi_offset ra, -8
-; LMULMAX4-NEXT:    vmv4r.v v24, v12
-; LMULMAX4-NEXT:    vmv4r.v v28, v8
-; LMULMAX4-NEXT:    li a1, 2
-; LMULMAX4-NEXT:    vmv4r.v v8, v16
-; LMULMAX4-NEXT:    vmv4r.v v12, v20
-; LMULMAX4-NEXT:    vmv4r.v v16, v28
-; LMULMAX4-NEXT:    vmv4r.v v20, v24
-; LMULMAX4-NEXT:    call ext2
-; LMULMAX4-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    addi sp, sp, 16
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset ra, -8
+; CHECK-NEXT:    vmv8r.v v24, v8
+; CHECK-NEXT:    li a1, 2
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    vmv8r.v v16, v24
+; CHECK-NEXT:    call ext2
+; CHECK-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
   %t = call fastcc <32 x i32> @ext2(<32 x i32> %y, <32 x i32> %x, i32 %w, i32 2)
   ret <32 x i32> %t
 }
 
 define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) {
-; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi sp, sp, -256
-; LMULMAX8-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX8-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    .cfi_offset ra, -8
-; LMULMAX8-NEXT:    .cfi_offset s0, -16
-; LMULMAX8-NEXT:    addi s0, sp, 256
-; LMULMAX8-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX8-NEXT:    andi sp, sp, -128
-; LMULMAX8-NEXT:    li a2, 32
-; LMULMAX8-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v24, (a0)
-; LMULMAX8-NEXT:    mv a3, sp
-; LMULMAX8-NEXT:    mv a0, sp
-; LMULMAX8-NEXT:    li a2, 42
-; LMULMAX8-NEXT:    vse32.v v8, (a3)
-; LMULMAX8-NEXT:    vmv.v.v v8, v24
-; LMULMAX8-NEXT:    call ext3
-; LMULMAX8-NEXT:    addi sp, s0, -256
-; LMULMAX8-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    addi sp, sp, 256
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi sp, sp, -256
-; LMULMAX4-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX4-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    .cfi_offset ra, -8
-; LMULMAX4-NEXT:    .cfi_offset s0, -16
-; LMULMAX4-NEXT:    addi s0, sp, 256
-; LMULMAX4-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX4-NEXT:    andi sp, sp, -128
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v24, (a0)
-; LMULMAX4-NEXT:    addi a0, a0, 64
-; LMULMAX4-NEXT:    vle32.v v28, (a0)
-; LMULMAX4-NEXT:    addi a0, sp, 64
-; LMULMAX4-NEXT:    vse32.v v12, (a0)
-; LMULMAX4-NEXT:    mv a1, sp
-; LMULMAX4-NEXT:    mv a0, sp
-; LMULMAX4-NEXT:    li a3, 42
-; LMULMAX4-NEXT:    vse32.v v8, (a1)
-; LMULMAX4-NEXT:    vmv.v.v v8, v24
-; LMULMAX4-NEXT:    vmv.v.v v12, v28
-; LMULMAX4-NEXT:    call ext3
-; LMULMAX4-NEXT:    addi sp, s0, -256
-; LMULMAX4-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    addi sp, sp, 256
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -256
+; CHECK-NEXT:    .cfi_def_cfa_offset 256
+; CHECK-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset ra, -8
+; CHECK-NEXT:    .cfi_offset s0, -16
+; CHECK-NEXT:    addi s0, sp, 256
+; CHECK-NEXT:    .cfi_def_cfa s0, 0
+; CHECK-NEXT:    andi sp, sp, -128
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v24, (a0)
+; CHECK-NEXT:    mv a3, sp
+; CHECK-NEXT:    mv a0, sp
+; CHECK-NEXT:    li a2, 42
+; CHECK-NEXT:    vse32.v v8, (a3)
+; CHECK-NEXT:    vmv.v.v v8, v24
+; CHECK-NEXT:    call ext3
+; CHECK-NEXT:    addi sp, s0, -256
+; CHECK-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 256
+; CHECK-NEXT:    ret
   %t = call fastcc <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42)
   ret <32 x i32> %t
 }
@@ -344,127 +226,67 @@ define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x
 ; stack, but with fastcc can pass indirectly with the extra GPR registers
 ; allowed.
 define fastcc <32 x i32> @vector_arg_indirect_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %8) {
-; LMULMAX8-LABEL: vector_arg_indirect_stack:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v16, (t2)
-; LMULMAX8-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: vector_arg_indirect_stack:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi a0, t2, 64
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v16, (t2)
-; LMULMAX4-NEXT:    vle32.v v20, (a0)
-; LMULMAX4-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX4-NEXT:    vadd.vv v12, v12, v20
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: vector_arg_indirect_stack:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v16, (t2)
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
   %s = add <32 x i32> %x, %z
   ret <32 x i32> %s
 }
 
 ; Calling the function above. Ensure we pass the arguments correctly.
 define fastcc <32 x i32> @pass_vector_arg_indirect_stack(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z) {
-; LMULMAX8-LABEL: pass_vector_arg_indirect_stack:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi sp, sp, -256
-; LMULMAX8-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX8-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    .cfi_offset ra, -8
-; LMULMAX8-NEXT:    .cfi_offset s0, -16
-; LMULMAX8-NEXT:    addi s0, sp, 256
-; LMULMAX8-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX8-NEXT:    andi sp, sp, -128
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    mv a0, sp
-; LMULMAX8-NEXT:    li a1, 1
-; LMULMAX8-NEXT:    li a2, 2
-; LMULMAX8-NEXT:    li a3, 3
-; LMULMAX8-NEXT:    li a4, 4
-; LMULMAX8-NEXT:    li a5, 5
-; LMULMAX8-NEXT:    li a6, 6
-; LMULMAX8-NEXT:    li a7, 7
-; LMULMAX8-NEXT:    mv t2, sp
-; LMULMAX8-NEXT:    li t3, 8
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    li a0, 0
-; LMULMAX8-NEXT:    vmv.v.i v16, 0
-; LMULMAX8-NEXT:    call vector_arg_indirect_stack
-; LMULMAX8-NEXT:    addi sp, s0, -256
-; LMULMAX8-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    addi sp, sp, 256
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: pass_vector_arg_indirect_stack:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi sp, sp, -256
-; LMULMAX4-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX4-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    .cfi_offset ra, -8
-; LMULMAX4-NEXT:    .cfi_offset s0, -16
-; LMULMAX4-NEXT:    addi s0, sp, 256
-; LMULMAX4-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX4-NEXT:    andi sp, sp, -128
-; LMULMAX4-NEXT:    addi a0, sp, 64
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vmv.v.i v8, 0
-; LMULMAX4-NEXT:    vse32.v v8, (a0)
-; LMULMAX4-NEXT:    mv a0, sp
-; LMULMAX4-NEXT:    li a1, 1
-; LMULMAX4-NEXT:    li a2, 2
-; LMULMAX4-NEXT:    li a3, 3
-; LMULMAX4-NEXT:    li a4, 4
-; LMULMAX4-NEXT:    li a5, 5
-; LMULMAX4-NEXT:    li a6, 6
-; LMULMAX4-NEXT:    li a7, 7
-; LMULMAX4-NEXT:    mv t2, sp
-; LMULMAX4-NEXT:    li t4, 8
-; LMULMAX4-NEXT:    vse32.v v8, (a0)
-; LMULMAX4-NEXT:    li a0, 0
-; LMULMAX4-NEXT:    vmv.v.i v12, 0
-; LMULMAX4-NEXT:    vmv.v.i v16, 0
-; LMULMAX4-NEXT:    vmv.v.i v20, 0
-; LMULMAX4-NEXT:    call vector_arg_indirect_stack
-; LMULMAX4-NEXT:    addi sp, s0, -256
-; LMULMAX4-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    addi sp, sp, 256
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: pass_vector_arg_indirect_stack:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -256
+; CHECK-NEXT:    .cfi_def_cfa_offset 256
+; CHECK-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset ra, -8
+; CHECK-NEXT:    .cfi_offset s0, -16
+; CHECK-NEXT:    addi s0, sp, 256
+; CHECK-NEXT:    .cfi_def_cfa s0, 0
+; CHECK-NEXT:    andi sp, sp, -128
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    mv a0, sp
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    li a2, 2
+; CHECK-NEXT:    li a3, 3
+; CHECK-NEXT:    li a4, 4
+; CHECK-NEXT:    li a5, 5
+; CHECK-NEXT:    li a6, 6
+; CHECK-NEXT:    li a7, 7
+; CHECK-NEXT:    mv t2, sp
+; CHECK-NEXT:    li t3, 8
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    call vector_arg_indirect_stack
+; CHECK-NEXT:    addi sp, s0, -256
+; CHECK-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 256
+; CHECK-NEXT:    ret
   %s = call fastcc <32 x i32> @vector_arg_indirect_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 8)
   ret <32 x i32> %s
 }
 
 ; A pathological test case where even with fastcc we must use the stack for arguments %13 and %z
 define fastcc <32 x i32> @vector_arg_direct_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %last) {
-; LMULMAX8-LABEL: vector_arg_direct_stack:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    addi a1, sp, 8
-; LMULMAX8-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v24, (a1)
-; LMULMAX8-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX8-NEXT:    vadd.vv v8, v8, v24
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: vector_arg_direct_stack:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    addi a0, sp, 8
-; LMULMAX4-NEXT:    vle32.v v24, (a0)
-; LMULMAX4-NEXT:    addi a0, sp, 72
-; LMULMAX4-NEXT:    vle32.v v28, (a0)
-; LMULMAX4-NEXT:    vadd.vv v12, v12, v20
-; LMULMAX4-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX4-NEXT:    vadd.vv v8, v8, v24
-; LMULMAX4-NEXT:    vadd.vv v12, v12, v28
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: vector_arg_direct_stack:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    addi a1, sp, 8
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v24, (a1)
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    ret
   %s = add <32 x i32> %x, %y
   %t = add <32 x i32> %s, %z
   ret <32 x i32> %t
@@ -472,76 +294,39 @@ define fastcc <32 x i32> @vector_arg_direct_stack(i32 %0, i32 %1, i32 %2, i32 %3
 
 ; Calling the function above. Ensure we pass the arguments correctly.
 define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z) {
-; LMULMAX8-LABEL: pass_vector_arg_direct_stack:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi sp, sp, -160
-; LMULMAX8-NEXT:    .cfi_def_cfa_offset 160
-; LMULMAX8-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    .cfi_offset ra, -8
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    addi a0, sp, 8
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    li a0, 1
-; LMULMAX8-NEXT:    sd a0, 136(sp)
-; LMULMAX8-NEXT:    li a0, 13
-; LMULMAX8-NEXT:    li a1, 1
-; LMULMAX8-NEXT:    li a2, 2
-; LMULMAX8-NEXT:    li a3, 3
-; LMULMAX8-NEXT:    li a4, 4
-; LMULMAX8-NEXT:    li a5, 5
-; LMULMAX8-NEXT:    li a6, 6
-; LMULMAX8-NEXT:    li a7, 7
-; LMULMAX8-NEXT:    li t2, 8
-; LMULMAX8-NEXT:    li t3, 9
-; LMULMAX8-NEXT:    li t4, 10
-; LMULMAX8-NEXT:    li t5, 11
-; LMULMAX8-NEXT:    li t6, 12
-; LMULMAX8-NEXT:    sd a0, 0(sp)
-; LMULMAX8-NEXT:    li a0, 0
-; LMULMAX8-NEXT:    vmv.v.i v16, 0
-; LMULMAX8-NEXT:    call vector_arg_direct_stack
-; LMULMAX8-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    addi sp, sp, 160
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: pass_vector_arg_direct_stack:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi sp, sp, -160
-; LMULMAX4-NEXT:    .cfi_def_cfa_offset 160
-; LMULMAX4-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    .cfi_offset ra, -8
-; LMULMAX4-NEXT:    li a0, 1
-; LMULMAX4-NEXT:    sd a0, 136(sp)
-; LMULMAX4-NEXT:    li a0, 13
-; LMULMAX4-NEXT:    sd a0, 0(sp)
-; LMULMAX4-NEXT:    addi a0, sp, 72
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vmv.v.i v8, 0
-; LMULMAX4-NEXT:    vse32.v v8, (a0)
-; LMULMAX4-NEXT:    addi a0, sp, 8
-; LMULMAX4-NEXT:    li a1, 1
-; LMULMAX4-NEXT:    li a2, 2
-; LMULMAX4-NEXT:    li a3, 3
-; LMULMAX4-NEXT:    li a4, 4
-; LMULMAX4-NEXT:    li a5, 5
-; LMULMAX4-NEXT:    li a6, 6
-; LMULMAX4-NEXT:    li a7, 7
-; LMULMAX4-NEXT:    li t2, 8
-; LMULMAX4-NEXT:    li t3, 9
-; LMULMAX4-NEXT:    li t4, 10
-; LMULMAX4-NEXT:    li t5, 11
-; LMULMAX4-NEXT:    li t6, 12
-; LMULMAX4-NEXT:    vse32.v v8, (a0)
-; LMULMAX4-NEXT:    li a0, 0
-; LMULMAX4-NEXT:    vmv.v.i v12, 0
-; LMULMAX4-NEXT:    vmv.v.i v16, 0
-; LMULMAX4-NEXT:    vmv.v.i v20, 0
-; LMULMAX4-NEXT:    call vector_arg_direct_stack
-; LMULMAX4-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    addi sp, sp, 160
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: pass_vector_arg_direct_stack:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -160
+; CHECK-NEXT:    .cfi_def_cfa_offset 160
+; CHECK-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset ra, -8
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    sd a0, 136(sp)
+; CHECK-NEXT:    li a0, 13
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    li a2, 2
+; CHECK-NEXT:    li a3, 3
+; CHECK-NEXT:    li a4, 4
+; CHECK-NEXT:    li a5, 5
+; CHECK-NEXT:    li a6, 6
+; CHECK-NEXT:    li a7, 7
+; CHECK-NEXT:    li t2, 8
+; CHECK-NEXT:    li t3, 9
+; CHECK-NEXT:    li t4, 10
+; CHECK-NEXT:    li t5, 11
+; CHECK-NEXT:    li t6, 12
+; CHECK-NEXT:    sd a0, 0(sp)
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    call vector_arg_direct_stack
+; CHECK-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 160
+; CHECK-NEXT:    ret
   %s = call fastcc <32 x i32> @vector_arg_direct_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 1)
   ret <32 x i32> %s
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
index 8e3a432b8ac834..3286c336a0fd11 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
@@ -1,8 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
 
 define <4 x i8> @ret_v4i8(ptr %p) {
 ; CHECK-LABEL: ret_v4i8:
@@ -25,81 +22,21 @@ define <4 x i32> @ret_v4i32(ptr %p) {
 }
 
 define <8 x i32> @ret_v8i32(ptr %p) {
-; LMULMAX8-LABEL: ret_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v8i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vle32.v v8, (a0)
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: ret_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ret_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ret_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
   %v = load <8 x i32>, ptr %p
   ret <8 x i32> %v
 }
 
 define <16 x i64> @ret_v16i64(ptr %p) {
-; LMULMAX8-LABEL: ret_v16i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v16i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vle64.v v8, (a0)
-; LMULMAX4-NEXT:    addi a0, a0, 64
-; LMULMAX4-NEXT:    vle64.v v12, (a0)
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: ret_v16i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    addi a1, a0, 32
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 64
-; LMULMAX2-NEXT:    vle64.v v12, (a1)
-; LMULMAX2-NEXT:    addi a0, a0, 96
-; LMULMAX2-NEXT:    vle64.v v14, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ret_v16i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    vle64.v v10, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 48
-; LMULMAX1-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 64
-; LMULMAX1-NEXT:    vle64.v v12, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 80
-; LMULMAX1-NEXT:    vle64.v v13, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 96
-; LMULMAX1-NEXT:    vle64.v v14, (a1)
-; LMULMAX1-NEXT:    addi a0, a0, 112
-; LMULMAX1-NEXT:    vle64.v v15, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ret_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    ret
   %v = load <16 x i64>, ptr %p
   ret <16 x i64> %v
 }
@@ -115,374 +52,51 @@ define <8 x i1> @ret_mask_v8i1(ptr %p) {
 }
 
 define <32 x i1> @ret_mask_v32i1(ptr %p) {
-; LMULMAX8-LABEL: ret_mask_v32i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vlm.v v0, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_mask_v32i1:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    li a1, 32
-; LMULMAX4-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX4-NEXT:    vlm.v v0, (a0)
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: ret_mask_v32i1:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vlm.v v0, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ret_mask_v32i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vlm.v v0, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 2
-; LMULMAX1-NEXT:    vlm.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ret_mask_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    ret
   %v = load <32 x i1>, ptr %p
   ret <32 x i1> %v
 }
 
 ; Return the vector via registers v8-v23
 define <64 x i32> @ret_split_v64i32(ptr %x) {
-; LMULMAX8-LABEL: ret_split_v64i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    addi a0, a0, 128
-; LMULMAX8-NEXT:    vle32.v v16, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_split_v64i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v8, (a0)
-; LMULMAX4-NEXT:    addi a1, a0, 64
-; LMULMAX4-NEXT:    vle32.v v12, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 128
-; LMULMAX4-NEXT:    vle32.v v16, (a1)
-; LMULMAX4-NEXT:    addi a0, a0, 192
-; LMULMAX4-NEXT:    vle32.v v20, (a0)
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: ret_split_v64i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    addi a1, a0, 32
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 64
-; LMULMAX2-NEXT:    vle32.v v12, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 96
-; LMULMAX2-NEXT:    vle32.v v14, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 128
-; LMULMAX2-NEXT:    vle32.v v16, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 160
-; LMULMAX2-NEXT:    vle32.v v18, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 192
-; LMULMAX2-NEXT:    vle32.v v20, (a1)
-; LMULMAX2-NEXT:    addi a0, a0, 224
-; LMULMAX2-NEXT:    vle32.v v22, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ret_split_v64i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle32.v v9, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    vle32.v v10, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 48
-; LMULMAX1-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 64
-; LMULMAX1-NEXT:    vle32.v v12, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 80
-; LMULMAX1-NEXT:    vle32.v v13, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 96
-; LMULMAX1-NEXT:    vle32.v v14, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 112
-; LMULMAX1-NEXT:    vle32.v v15, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 128
-; LMULMAX1-NEXT:    vle32.v v16, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 144
-; LMULMAX1-NEXT:    vle32.v v17, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 160
-; LMULMAX1-NEXT:    vle32.v v18, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 176
-; LMULMAX1-NEXT:    vle32.v v19, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 192
-; LMULMAX1-NEXT:    vle32.v v20, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 208
-; LMULMAX1-NEXT:    vle32.v v21, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 224
-; LMULMAX1-NEXT:    vle32.v v22, (a1)
-; LMULMAX1-NEXT:    addi a0, a0, 240
-; LMULMAX1-NEXT:    vle32.v v23, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ret_split_v64i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vle32.v v16, (a0)
+; CHECK-NEXT:    ret
   %v = load <64 x i32>, ptr %x
   ret <64 x i32> %v
 }
 
 ; Return the vector fully via the stack
 define <128 x i32> @ret_split_v128i32(ptr %x) {
-; LMULMAX8-LABEL: ret_split_v128i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi a2, a1, 128
-; LMULMAX8-NEXT:    li a3, 32
-; LMULMAX8-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a2)
-; LMULMAX8-NEXT:    addi a2, a1, 256
-; LMULMAX8-NEXT:    vle32.v v16, (a1)
-; LMULMAX8-NEXT:    addi a1, a1, 384
-; LMULMAX8-NEXT:    vle32.v v24, (a1)
-; LMULMAX8-NEXT:    vle32.v v0, (a2)
-; LMULMAX8-NEXT:    vse32.v v16, (a0)
-; LMULMAX8-NEXT:    addi a1, a0, 384
-; LMULMAX8-NEXT:    vse32.v v24, (a1)
-; LMULMAX8-NEXT:    addi a1, a0, 256
-; LMULMAX8-NEXT:    vse32.v v0, (a1)
-; LMULMAX8-NEXT:    addi a0, a0, 128
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_split_v128i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi a2, a1, 64
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v8, (a2)
-; LMULMAX4-NEXT:    addi a2, a1, 128
-; LMULMAX4-NEXT:    vle32.v v12, (a2)
-; LMULMAX4-NEXT:    addi a2, a1, 192
-; LMULMAX4-NEXT:    vle32.v v16, (a2)
-; LMULMAX4-NEXT:    addi a2, a1, 256
-; LMULMAX4-NEXT:    vle32.v v20, (a2)
-; LMULMAX4-NEXT:    addi a2, a1, 320
-; LMULMAX4-NEXT:    vle32.v v24, (a2)
-; LMULMAX4-NEXT:    addi a2, a1, 384
-; LMULMAX4-NEXT:    vle32.v v28, (a1)
-; LMULMAX4-NEXT:    addi a1, a1, 448
-; LMULMAX4-NEXT:    vle32.v v0, (a1)
-; LMULMAX4-NEXT:    vle32.v v4, (a2)
-; LMULMAX4-NEXT:    vse32.v v28, (a0)
-; LMULMAX4-NEXT:    addi a1, a0, 448
-; LMULMAX4-NEXT:    vse32.v v0, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 384
-; LMULMAX4-NEXT:    vse32.v v4, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 320
-; LMULMAX4-NEXT:    vse32.v v24, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 256
-; LMULMAX4-NEXT:    vse32.v v20, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 192
-; LMULMAX4-NEXT:    vse32.v v16, (a1)
-; LMULMAX4-NEXT:    addi a1, a0, 128
-; LMULMAX4-NEXT:    vse32.v v12, (a1)
-; LMULMAX4-NEXT:    addi a0, a0, 64
-; LMULMAX4-NEXT:    vse32.v v8, (a0)
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: ret_split_v128i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    addi a2, a1, 32
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 64
-; LMULMAX2-NEXT:    vle32.v v10, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 96
-; LMULMAX2-NEXT:    vle32.v v12, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 128
-; LMULMAX2-NEXT:    vle32.v v14, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 160
-; LMULMAX2-NEXT:    vle32.v v16, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 192
-; LMULMAX2-NEXT:    vle32.v v18, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 224
-; LMULMAX2-NEXT:    vle32.v v20, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 256
-; LMULMAX2-NEXT:    vle32.v v22, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 288
-; LMULMAX2-NEXT:    vle32.v v24, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 320
-; LMULMAX2-NEXT:    vle32.v v26, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 352
-; LMULMAX2-NEXT:    vle32.v v28, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 384
-; LMULMAX2-NEXT:    vle32.v v30, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 416
-; LMULMAX2-NEXT:    vle32.v v0, (a2)
-; LMULMAX2-NEXT:    addi a2, a1, 448
-; LMULMAX2-NEXT:    vle32.v v2, (a1)
-; LMULMAX2-NEXT:    addi a1, a1, 480
-; LMULMAX2-NEXT:    vle32.v v4, (a1)
-; LMULMAX2-NEXT:    vle32.v v6, (a2)
-; LMULMAX2-NEXT:    vse32.v v2, (a0)
-; LMULMAX2-NEXT:    addi a1, a0, 480
-; LMULMAX2-NEXT:    vse32.v v4, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 448
-; LMULMAX2-NEXT:    vse32.v v6, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 416
-; LMULMAX2-NEXT:    vse32.v v0, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 384
-; LMULMAX2-NEXT:    vse32.v v30, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 352
-; LMULMAX2-NEXT:    vse32.v v28, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 320
-; LMULMAX2-NEXT:    vse32.v v26, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 288
-; LMULMAX2-NEXT:    vse32.v v24, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 256
-; LMULMAX2-NEXT:    vse32.v v22, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 224
-; LMULMAX2-NEXT:    vse32.v v20, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 192
-; LMULMAX2-NEXT:    vse32.v v18, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 160
-; LMULMAX2-NEXT:    vse32.v v16, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 128
-; LMULMAX2-NEXT:    vse32.v v14, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 96
-; LMULMAX2-NEXT:    vse32.v v12, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 64
-; LMULMAX2-NEXT:    vse32.v v10, (a1)
-; LMULMAX2-NEXT:    addi a0, a0, 32
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ret_split_v128i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a2, a1, 16
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 32
-; LMULMAX1-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 48
-; LMULMAX1-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 64
-; LMULMAX1-NEXT:    vle32.v v11, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 80
-; LMULMAX1-NEXT:    vle32.v v12, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 96
-; LMULMAX1-NEXT:    vle32.v v13, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 112
-; LMULMAX1-NEXT:    vle32.v v14, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 128
-; LMULMAX1-NEXT:    vle32.v v15, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 144
-; LMULMAX1-NEXT:    vle32.v v16, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 160
-; LMULMAX1-NEXT:    vle32.v v17, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 176
-; LMULMAX1-NEXT:    vle32.v v18, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 192
-; LMULMAX1-NEXT:    vle32.v v19, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 208
-; LMULMAX1-NEXT:    vle32.v v20, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 224
-; LMULMAX1-NEXT:    vle32.v v21, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 240
-; LMULMAX1-NEXT:    vle32.v v22, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 256
-; LMULMAX1-NEXT:    vle32.v v23, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 272
-; LMULMAX1-NEXT:    vle32.v v24, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 288
-; LMULMAX1-NEXT:    vle32.v v25, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 304
-; LMULMAX1-NEXT:    vle32.v v26, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 320
-; LMULMAX1-NEXT:    vle32.v v27, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 336
-; LMULMAX1-NEXT:    vle32.v v28, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 352
-; LMULMAX1-NEXT:    vle32.v v29, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 368
-; LMULMAX1-NEXT:    vle32.v v30, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 384
-; LMULMAX1-NEXT:    vle32.v v31, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 400
-; LMULMAX1-NEXT:    vle32.v v0, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 416
-; LMULMAX1-NEXT:    vle32.v v1, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 432
-; LMULMAX1-NEXT:    vle32.v v2, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 448
-; LMULMAX1-NEXT:    vle32.v v3, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 464
-; LMULMAX1-NEXT:    vle32.v v4, (a2)
-; LMULMAX1-NEXT:    addi a2, a1, 480
-; LMULMAX1-NEXT:    vle32.v v5, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 496
-; LMULMAX1-NEXT:    vle32.v v6, (a1)
-; LMULMAX1-NEXT:    vle32.v v7, (a2)
-; LMULMAX1-NEXT:    vse32.v v5, (a0)
-; LMULMAX1-NEXT:    addi a1, a0, 496
-; LMULMAX1-NEXT:    vse32.v v6, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 480
-; LMULMAX1-NEXT:    vse32.v v7, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 464
-; LMULMAX1-NEXT:    vse32.v v4, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 448
-; LMULMAX1-NEXT:    vse32.v v3, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 432
-; LMULMAX1-NEXT:    vse32.v v2, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 416
-; LMULMAX1-NEXT:    vse32.v v1, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 400
-; LMULMAX1-NEXT:    vse32.v v0, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 384
-; LMULMAX1-NEXT:    vse32.v v31, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 368
-; LMULMAX1-NEXT:    vse32.v v30, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 352
-; LMULMAX1-NEXT:    vse32.v v29, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 336
-; LMULMAX1-NEXT:    vse32.v v28, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 320
-; LMULMAX1-NEXT:    vse32.v v27, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 304
-; LMULMAX1-NEXT:    vse32.v v26, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 288
-; LMULMAX1-NEXT:    vse32.v v25, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 272
-; LMULMAX1-NEXT:    vse32.v v24, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 256
-; LMULMAX1-NEXT:    vse32.v v23, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 240
-; LMULMAX1-NEXT:    vse32.v v22, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 224
-; LMULMAX1-NEXT:    vse32.v v21, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 208
-; LMULMAX1-NEXT:    vse32.v v20, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 192
-; LMULMAX1-NEXT:    vse32.v v19, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 176
-; LMULMAX1-NEXT:    vse32.v v18, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 160
-; LMULMAX1-NEXT:    vse32.v v17, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 144
-; LMULMAX1-NEXT:    vse32.v v16, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 128
-; LMULMAX1-NEXT:    vse32.v v15, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 112
-; LMULMAX1-NEXT:    vse32.v v14, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 96
-; LMULMAX1-NEXT:    vse32.v v13, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 80
-; LMULMAX1-NEXT:    vse32.v v12, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 64
-; LMULMAX1-NEXT:    vse32.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 48
-; LMULMAX1-NEXT:    vse32.v v10, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ret_split_v128i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a1, 128
+; CHECK-NEXT:    li a3, 32
+; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a2)
+; CHECK-NEXT:    addi a2, a1, 256
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    addi a1, a1, 384
+; CHECK-NEXT:    vle32.v v24, (a1)
+; CHECK-NEXT:    vle32.v v0, (a2)
+; CHECK-NEXT:    vse32.v v16, (a0)
+; CHECK-NEXT:    addi a1, a0, 384
+; CHECK-NEXT:    vse32.v v24, (a1)
+; CHECK-NEXT:    addi a1, a0, 256
+; CHECK-NEXT:    vse32.v v0, (a1)
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %v = load <128 x i32>, ptr %x
   ret <128 x i32> %v
 }
@@ -508,30 +122,11 @@ define <4 x i8> @ret_v4i8_param_v4i8_v4i8(<4 x i8> %v, <4 x i8> %w) {
 }
 
 define <4 x i64> @ret_v4i64_param_v4i64_v4i64(<4 x i64> %v, <4 x i64> %w) {
-; LMULMAX8-LABEL: ret_v4i64_param_v4i64_v4i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX8-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v4i64_param_v4i64_v4i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX4-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: ret_v4i64_param_v4i64_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ret_v4i64_param_v4i64_v4i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v11
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ret_v4i64_param_v4i64_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
   %r = add <4 x i64> %v, %w
   ret <4 x i64> %r
 }
@@ -547,130 +142,26 @@ define <8 x i1> @ret_v8i1_param_v8i1_v8i1(<8 x i1> %v, <8 x i1> %w) {
 }
 
 define <32 x i1> @ret_v32i1_param_v32i1_v32i1(<32 x i1> %v, <32 x i1> %w) {
-; LMULMAX8-LABEL: ret_v32i1_param_v32i1_v32i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vmand.mm v0, v0, v8
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v32i1_param_v32i1_v32i1:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    li a0, 32
-; LMULMAX4-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX4-NEXT:    vmand.mm v0, v0, v8
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: ret_v32i1_param_v32i1_v32i1:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a0, 32
-; LMULMAX2-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmand.mm v0, v0, v8
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ret_v32i1_param_v32i1_v32i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmand.mm v0, v0, v9
-; LMULMAX1-NEXT:    vmand.mm v8, v8, v10
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ret_v32i1_param_v32i1_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vmand.mm v0, v0, v8
+; CHECK-NEXT:    ret
   %r = and <32 x i1> %v, %w
   ret <32 x i1> %r
 }
 
 define <32 x i32> @ret_v32i32_param_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) {
-; LMULMAX8-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a2, 32
-; LMULMAX8-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v24, (a0)
-; LMULMAX8-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX8-NEXT:    vadd.vv v8, v8, v24
-; LMULMAX8-NEXT:    vadd.vx v8, v8, a1
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    addi a1, a0, 64
-; LMULMAX4-NEXT:    vle32.v v24, (a1)
-; LMULMAX4-NEXT:    vle32.v v28, (a0)
-; LMULMAX4-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX4-NEXT:    vadd.vv v12, v12, v20
-; LMULMAX4-NEXT:    vadd.vv v12, v12, v24
-; LMULMAX4-NEXT:    vadd.vv v8, v8, v28
-; LMULMAX4-NEXT:    vadd.vx v8, v8, a2
-; LMULMAX4-NEXT:    vadd.vx v12, v12, a2
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v24, (a0)
-; LMULMAX2-NEXT:    addi a1, a0, 32
-; LMULMAX2-NEXT:    vle32.v v26, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 64
-; LMULMAX2-NEXT:    vle32.v v28, (a1)
-; LMULMAX2-NEXT:    addi a0, a0, 96
-; LMULMAX2-NEXT:    vle32.v v30, (a0)
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX2-NEXT:    vadd.vv v10, v10, v18
-; LMULMAX2-NEXT:    vadd.vv v12, v12, v20
-; LMULMAX2-NEXT:    vadd.vv v14, v14, v22
-; LMULMAX2-NEXT:    vadd.vv v14, v14, v30
-; LMULMAX2-NEXT:    vadd.vv v12, v12, v28
-; LMULMAX2-NEXT:    vadd.vv v10, v10, v26
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v24
-; LMULMAX2-NEXT:    vadd.vx v8, v8, a4
-; LMULMAX2-NEXT:    vadd.vx v10, v10, a4
-; LMULMAX2-NEXT:    vadd.vx v12, v12, a4
-; LMULMAX2-NEXT:    vadd.vx v14, v14, a4
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v24, (a0)
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle32.v v25, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    vle32.v v26, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 48
-; LMULMAX1-NEXT:    vle32.v v27, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 64
-; LMULMAX1-NEXT:    vle32.v v28, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 80
-; LMULMAX1-NEXT:    vle32.v v29, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 96
-; LMULMAX1-NEXT:    vle32.v v30, (a1)
-; LMULMAX1-NEXT:    addi a0, a0, 112
-; LMULMAX1-NEXT:    vle32.v v31, (a0)
-; LMULMAX1-NEXT:    lw a0, 0(sp)
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v17
-; LMULMAX1-NEXT:    vadd.vv v10, v10, v18
-; LMULMAX1-NEXT:    vadd.vv v11, v11, v19
-; LMULMAX1-NEXT:    vadd.vv v12, v12, v20
-; LMULMAX1-NEXT:    vadd.vv v13, v13, v21
-; LMULMAX1-NEXT:    vadd.vv v14, v14, v22
-; LMULMAX1-NEXT:    vadd.vv v15, v15, v23
-; LMULMAX1-NEXT:    vadd.vv v15, v15, v31
-; LMULMAX1-NEXT:    vadd.vv v14, v14, v30
-; LMULMAX1-NEXT:    vadd.vv v13, v13, v29
-; LMULMAX1-NEXT:    vadd.vv v12, v12, v28
-; LMULMAX1-NEXT:    vadd.vv v11, v11, v27
-; LMULMAX1-NEXT:    vadd.vv v10, v10, v26
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v25
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v24
-; LMULMAX1-NEXT:    vadd.vx v8, v8, a0
-; LMULMAX1-NEXT:    vadd.vx v9, v9, a0
-; LMULMAX1-NEXT:    vadd.vx v10, v10, a0
-; LMULMAX1-NEXT:    vadd.vx v11, v11, a0
-; LMULMAX1-NEXT:    vadd.vx v12, v12, a0
-; LMULMAX1-NEXT:    vadd.vx v13, v13, a0
-; LMULMAX1-NEXT:    vadd.vx v14, v14, a0
-; LMULMAX1-NEXT:    vadd.vx v15, v15, a0
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v24, (a0)
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    vadd.vx v8, v8, a1
+; CHECK-NEXT:    ret
   %r = add <32 x i32> %x, %y
   %s = add <32 x i32> %r, %z
   %head = insertelement <32 x i32> poison, i32 %w, i32 0
@@ -683,499 +174,105 @@ declare <32 x i32> @ext2(<32 x i32>, <32 x i32>, i32, i32)
 declare <32 x i32> @ext3(<32 x i32>, <32 x i32>, <32 x i32>, i32, i32)
 
 define <32 x i32> @ret_v32i32_call_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, i32 %w) {
-; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi sp, sp, -16
-; LMULMAX8-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX8-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    .cfi_offset ra, -8
-; LMULMAX8-NEXT:    vmv8r.v v24, v8
-; LMULMAX8-NEXT:    li a1, 2
-; LMULMAX8-NEXT:    vmv8r.v v8, v16
-; LMULMAX8-NEXT:    vmv8r.v v16, v24
-; LMULMAX8-NEXT:    call ext2
-; LMULMAX8-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    addi sp, sp, 16
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi sp, sp, -16
-; LMULMAX4-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX4-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    .cfi_offset ra, -8
-; LMULMAX4-NEXT:    vmv4r.v v24, v12
-; LMULMAX4-NEXT:    vmv4r.v v28, v8
-; LMULMAX4-NEXT:    li a1, 2
-; LMULMAX4-NEXT:    vmv4r.v v8, v16
-; LMULMAX4-NEXT:    vmv4r.v v12, v20
-; LMULMAX4-NEXT:    vmv4r.v v16, v28
-; LMULMAX4-NEXT:    vmv4r.v v20, v24
-; LMULMAX4-NEXT:    call ext2
-; LMULMAX4-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    addi sp, sp, 16
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    addi sp, sp, -16
-; LMULMAX2-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX2-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; LMULMAX2-NEXT:    .cfi_offset ra, -8
-; LMULMAX2-NEXT:    vmv2r.v v24, v14
-; LMULMAX2-NEXT:    vmv2r.v v26, v12
-; LMULMAX2-NEXT:    vmv2r.v v28, v10
-; LMULMAX2-NEXT:    vmv2r.v v30, v8
-; LMULMAX2-NEXT:    li a1, 2
-; LMULMAX2-NEXT:    vmv2r.v v8, v16
-; LMULMAX2-NEXT:    vmv2r.v v10, v18
-; LMULMAX2-NEXT:    vmv2r.v v12, v20
-; LMULMAX2-NEXT:    vmv2r.v v14, v22
-; LMULMAX2-NEXT:    vmv2r.v v16, v30
-; LMULMAX2-NEXT:    vmv2r.v v18, v28
-; LMULMAX2-NEXT:    vmv2r.v v20, v26
-; LMULMAX2-NEXT:    vmv2r.v v22, v24
-; LMULMAX2-NEXT:    call ext2
-; LMULMAX2-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT:    addi sp, sp, 16
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi sp, sp, -16
-; LMULMAX1-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX1-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; LMULMAX1-NEXT:    .cfi_offset ra, -8
-; LMULMAX1-NEXT:    vmv1r.v v24, v15
-; LMULMAX1-NEXT:    vmv1r.v v25, v14
-; LMULMAX1-NEXT:    vmv1r.v v26, v13
-; LMULMAX1-NEXT:    vmv1r.v v27, v12
-; LMULMAX1-NEXT:    vmv1r.v v28, v11
-; LMULMAX1-NEXT:    vmv1r.v v29, v10
-; LMULMAX1-NEXT:    vmv1r.v v30, v9
-; LMULMAX1-NEXT:    vmv1r.v v31, v8
-; LMULMAX1-NEXT:    li a1, 2
-; LMULMAX1-NEXT:    vmv1r.v v8, v16
-; LMULMAX1-NEXT:    vmv1r.v v9, v17
-; LMULMAX1-NEXT:    vmv1r.v v10, v18
-; LMULMAX1-NEXT:    vmv1r.v v11, v19
-; LMULMAX1-NEXT:    vmv1r.v v12, v20
-; LMULMAX1-NEXT:    vmv1r.v v13, v21
-; LMULMAX1-NEXT:    vmv1r.v v14, v22
-; LMULMAX1-NEXT:    vmv1r.v v15, v23
-; LMULMAX1-NEXT:    vmv1r.v v16, v31
-; LMULMAX1-NEXT:    vmv1r.v v17, v30
-; LMULMAX1-NEXT:    vmv1r.v v18, v29
-; LMULMAX1-NEXT:    vmv1r.v v19, v28
-; LMULMAX1-NEXT:    vmv1r.v v20, v27
-; LMULMAX1-NEXT:    vmv1r.v v21, v26
-; LMULMAX1-NEXT:    vmv1r.v v22, v25
-; LMULMAX1-NEXT:    vmv1r.v v23, v24
-; LMULMAX1-NEXT:    call ext2
-; LMULMAX1-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT:    addi sp, sp, 16
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ret_v32i32_call_v32i32_v32i32_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset ra, -8
+; CHECK-NEXT:    vmv8r.v v24, v8
+; CHECK-NEXT:    li a1, 2
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    vmv8r.v v16, v24
+; CHECK-NEXT:    call ext2
+; CHECK-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
   %t = call <32 x i32> @ext2(<32 x i32> %y, <32 x i32> %x, i32 %w, i32 2)
   ret <32 x i32> %t
 }
 
 define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) {
-; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi sp, sp, -256
-; LMULMAX8-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX8-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    .cfi_offset ra, -8
-; LMULMAX8-NEXT:    .cfi_offset s0, -16
-; LMULMAX8-NEXT:    addi s0, sp, 256
-; LMULMAX8-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX8-NEXT:    andi sp, sp, -128
-; LMULMAX8-NEXT:    li a2, 32
-; LMULMAX8-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v24, (a0)
-; LMULMAX8-NEXT:    mv a3, sp
-; LMULMAX8-NEXT:    mv a0, sp
-; LMULMAX8-NEXT:    li a2, 42
-; LMULMAX8-NEXT:    vse32.v v8, (a3)
-; LMULMAX8-NEXT:    vmv.v.v v8, v24
-; LMULMAX8-NEXT:    call ext3
-; LMULMAX8-NEXT:    addi sp, s0, -256
-; LMULMAX8-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    addi sp, sp, 256
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi sp, sp, -256
-; LMULMAX4-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX4-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    .cfi_offset ra, -8
-; LMULMAX4-NEXT:    .cfi_offset s0, -16
-; LMULMAX4-NEXT:    addi s0, sp, 256
-; LMULMAX4-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX4-NEXT:    andi sp, sp, -128
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v24, (a0)
-; LMULMAX4-NEXT:    addi a0, a0, 64
-; LMULMAX4-NEXT:    vle32.v v28, (a0)
-; LMULMAX4-NEXT:    addi a0, sp, 64
-; LMULMAX4-NEXT:    vse32.v v12, (a0)
-; LMULMAX4-NEXT:    mv a1, sp
-; LMULMAX4-NEXT:    mv a0, sp
-; LMULMAX4-NEXT:    li a3, 42
-; LMULMAX4-NEXT:    vse32.v v8, (a1)
-; LMULMAX4-NEXT:    vmv.v.v v8, v24
-; LMULMAX4-NEXT:    vmv.v.v v12, v28
-; LMULMAX4-NEXT:    call ext3
-; LMULMAX4-NEXT:    addi sp, s0, -256
-; LMULMAX4-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    addi sp, sp, 256
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    addi sp, sp, -256
-; LMULMAX2-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX2-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX2-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
-; LMULMAX2-NEXT:    .cfi_offset ra, -8
-; LMULMAX2-NEXT:    .cfi_offset s0, -16
-; LMULMAX2-NEXT:    addi s0, sp, 256
-; LMULMAX2-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX2-NEXT:    andi sp, sp, -128
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v24, (a0)
-; LMULMAX2-NEXT:    addi a1, a0, 32
-; LMULMAX2-NEXT:    vle32.v v26, (a1)
-; LMULMAX2-NEXT:    addi a1, a0, 64
-; LMULMAX2-NEXT:    vle32.v v28, (a1)
-; LMULMAX2-NEXT:    addi a0, a0, 96
-; LMULMAX2-NEXT:    vle32.v v30, (a0)
-; LMULMAX2-NEXT:    addi a0, sp, 96
-; LMULMAX2-NEXT:    vse32.v v14, (a0)
-; LMULMAX2-NEXT:    addi a0, sp, 64
-; LMULMAX2-NEXT:    vse32.v v12, (a0)
-; LMULMAX2-NEXT:    addi a0, sp, 32
-; LMULMAX2-NEXT:    vse32.v v10, (a0)
-; LMULMAX2-NEXT:    mv a1, sp
-; LMULMAX2-NEXT:    mv a0, sp
-; LMULMAX2-NEXT:    li a5, 42
-; LMULMAX2-NEXT:    vse32.v v8, (a1)
-; LMULMAX2-NEXT:    vmv.v.v v8, v24
-; LMULMAX2-NEXT:    vmv.v.v v10, v26
-; LMULMAX2-NEXT:    vmv.v.v v12, v28
-; LMULMAX2-NEXT:    vmv.v.v v14, v30
-; LMULMAX2-NEXT:    call ext3
-; LMULMAX2-NEXT:    addi sp, s0, -256
-; LMULMAX2-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT:    addi sp, sp, 256
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi sp, sp, -256
-; LMULMAX1-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX1-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX1-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
-; LMULMAX1-NEXT:    sd s1, 232(sp) # 8-byte Folded Spill
-; LMULMAX1-NEXT:    .cfi_offset ra, -8
-; LMULMAX1-NEXT:    .cfi_offset s0, -16
-; LMULMAX1-NEXT:    .cfi_offset s1, -24
-; LMULMAX1-NEXT:    addi s0, sp, 256
-; LMULMAX1-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX1-NEXT:    andi sp, sp, -128
-; LMULMAX1-NEXT:    mv s1, sp
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v24, (a0)
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle32.v v25, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    vle32.v v26, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 48
-; LMULMAX1-NEXT:    vle32.v v27, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 64
-; LMULMAX1-NEXT:    vle32.v v28, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 80
-; LMULMAX1-NEXT:    vle32.v v29, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 96
-; LMULMAX1-NEXT:    vle32.v v30, (a1)
-; LMULMAX1-NEXT:    addi a0, a0, 112
-; LMULMAX1-NEXT:    vle32.v v31, (a0)
-; LMULMAX1-NEXT:    ld a1, 0(s0)
-; LMULMAX1-NEXT:    addi sp, sp, -16
-; LMULMAX1-NEXT:    addi a0, s1, 112
-; LMULMAX1-NEXT:    vse32.v v15, (a0)
-; LMULMAX1-NEXT:    addi a0, s1, 96
-; LMULMAX1-NEXT:    vse32.v v14, (a0)
-; LMULMAX1-NEXT:    addi a0, s1, 80
-; LMULMAX1-NEXT:    vse32.v v13, (a0)
-; LMULMAX1-NEXT:    addi a0, s1, 64
-; LMULMAX1-NEXT:    vse32.v v12, (a0)
-; LMULMAX1-NEXT:    addi a0, s1, 48
-; LMULMAX1-NEXT:    vse32.v v11, (a0)
-; LMULMAX1-NEXT:    addi a0, s1, 32
-; LMULMAX1-NEXT:    vse32.v v10, (a0)
-; LMULMAX1-NEXT:    addi a0, s1, 16
-; LMULMAX1-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-NEXT:    mv a0, s1
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    li a0, 42
-; LMULMAX1-NEXT:    sd a0, 8(sp)
-; LMULMAX1-NEXT:    mv a0, s1
-; LMULMAX1-NEXT:    sd a1, 0(sp)
-; LMULMAX1-NEXT:    vmv.v.v v8, v24
-; LMULMAX1-NEXT:    vmv.v.v v9, v25
-; LMULMAX1-NEXT:    vmv.v.v v10, v26
-; LMULMAX1-NEXT:    vmv.v.v v11, v27
-; LMULMAX1-NEXT:    vmv.v.v v12, v28
-; LMULMAX1-NEXT:    vmv.v.v v13, v29
-; LMULMAX1-NEXT:    vmv.v.v v14, v30
-; LMULMAX1-NEXT:    vmv.v.v v15, v31
-; LMULMAX1-NEXT:    call ext3
-; LMULMAX1-NEXT:    addi sp, sp, 16
-; LMULMAX1-NEXT:    addi sp, s0, -256
-; LMULMAX1-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT:    ld s1, 232(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT:    addi sp, sp, 256
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -256
+; CHECK-NEXT:    .cfi_def_cfa_offset 256
+; CHECK-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset ra, -8
+; CHECK-NEXT:    .cfi_offset s0, -16
+; CHECK-NEXT:    addi s0, sp, 256
+; CHECK-NEXT:    .cfi_def_cfa s0, 0
+; CHECK-NEXT:    andi sp, sp, -128
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v24, (a0)
+; CHECK-NEXT:    mv a3, sp
+; CHECK-NEXT:    mv a0, sp
+; CHECK-NEXT:    li a2, 42
+; CHECK-NEXT:    vse32.v v8, (a3)
+; CHECK-NEXT:    vmv.v.v v8, v24
+; CHECK-NEXT:    call ext3
+; CHECK-NEXT:    addi sp, s0, -256
+; CHECK-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 256
+; CHECK-NEXT:    ret
   %t = call <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42)
   ret <32 x i32> %t
 }
 
 ; Test various configurations of split vector types where the values are split
 ; across both registers and the stack.
-; LMUL8: Ins: v8,v9,v10,v11,v12, v16m8 y[0:31], a0+0 z[0:31]
-; LMUL4: Ins: v8,v9,v10,v11,v12, v16m4 y[0:15], v20m4 y[16:31], a0+0 z[0:15],
 ;             a0+64 z[16:31]
-; LMUL2: Ins: v8,v9,v10,v11,v12, v14m2 y[0:7], v16m2 y[8:15], v18m2 y[16:23],
 ;             v20m2 y[24:31], v22m2 z[0:7], a1+0 z[8:15], a1+32 z[16:23],
 ;             a1+64 z[24:31]
-; LMUL1: Ins: v8,v9,v10,v11,v12, v13 y[0:3], v14 y[4:7], v15 y[8:11],
 ;             v16 y[12:15], v17 y[16:19], v18 y[20:23], v19 y[24:27],
 ;             v20 y[28:31], v21 z[0:3], v22 z[4:7], v23 z[8:11],
 ;             a1+0 z[12:15], a1+16 z[16:19], a1+32 z[20:23], a1+48 z[24:27],
 ;             a1+64 z[28:31]
 define <32 x i32> @split_vector_args(<2 x i32>,<2 x i32>,<2 x i32>,<2 x i32>,<2 x i32>, <32 x i32> %y, <32 x i32> %z) {
-; LMULMAX8-LABEL: split_vector_args:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vadd.vv v8, v16, v8
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: split_vector_args:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi a1, a0, 64
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v8, (a0)
-; LMULMAX4-NEXT:    vle32.v v12, (a1)
-; LMULMAX4-NEXT:    vadd.vv v8, v16, v8
-; LMULMAX4-NEXT:    vadd.vv v12, v20, v12
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: split_vector_args:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    addi a1, a0, 64
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v10, (a0)
-; LMULMAX2-NEXT:    addi a0, a0, 32
-; LMULMAX2-NEXT:    vle32.v v12, (a0)
-; LMULMAX2-NEXT:    vle32.v v24, (a1)
-; LMULMAX2-NEXT:    vadd.vv v8, v14, v22
-; LMULMAX2-NEXT:    vadd.vv v10, v16, v10
-; LMULMAX2-NEXT:    vadd.vv v12, v18, v12
-; LMULMAX2-NEXT:    vadd.vv v14, v20, v24
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: split_vector_args:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 64
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v24, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 48
-; LMULMAX1-NEXT:    vle32.v v25, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    vle32.v v26, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle32.v v12, (a1)
-; LMULMAX1-NEXT:    vle32.v v11, (a0)
-; LMULMAX1-NEXT:    vadd.vv v8, v13, v21
-; LMULMAX1-NEXT:    vadd.vv v9, v14, v22
-; LMULMAX1-NEXT:    vadd.vv v10, v15, v23
-; LMULMAX1-NEXT:    vadd.vv v11, v16, v11
-; LMULMAX1-NEXT:    vadd.vv v12, v17, v12
-; LMULMAX1-NEXT:    vadd.vv v13, v18, v26
-; LMULMAX1-NEXT:    vadd.vv v14, v19, v25
-; LMULMAX1-NEXT:    vadd.vv v15, v20, v24
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: split_vector_args:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vadd.vv v8, v16, v8
+; CHECK-NEXT:    ret
   %v0 = add <32 x i32> %y, %z
   ret <32 x i32> %v0
 }
 
 define <32 x i32> @call_split_vector_args(ptr %pa, ptr %pb) {
-; LMULMAX8-LABEL: call_split_vector_args:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi sp, sp, -256
-; LMULMAX8-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX8-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    .cfi_offset ra, -8
-; LMULMAX8-NEXT:    .cfi_offset s0, -16
-; LMULMAX8-NEXT:    addi s0, sp, 256
-; LMULMAX8-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX8-NEXT:    andi sp, sp, -128
-; LMULMAX8-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v16, (a1)
-; LMULMAX8-NEXT:    mv a1, sp
-; LMULMAX8-NEXT:    mv a0, sp
-; LMULMAX8-NEXT:    vse32.v v16, (a1)
-; LMULMAX8-NEXT:    vmv1r.v v9, v8
-; LMULMAX8-NEXT:    vmv1r.v v10, v8
-; LMULMAX8-NEXT:    vmv1r.v v11, v8
-; LMULMAX8-NEXT:    vmv1r.v v12, v8
-; LMULMAX8-NEXT:    call split_vector_args
-; LMULMAX8-NEXT:    addi sp, s0, -256
-; LMULMAX8-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    addi sp, sp, 256
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: call_split_vector_args:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi sp, sp, -256
-; LMULMAX4-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX4-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    .cfi_offset ra, -8
-; LMULMAX4-NEXT:    .cfi_offset s0, -16
-; LMULMAX4-NEXT:    addi s0, sp, 256
-; LMULMAX4-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX4-NEXT:    andi sp, sp, -128
-; LMULMAX4-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX4-NEXT:    vle32.v v8, (a0)
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v16, (a1)
-; LMULMAX4-NEXT:    addi a0, a1, 64
-; LMULMAX4-NEXT:    vle32.v v20, (a0)
-; LMULMAX4-NEXT:    addi a0, sp, 64
-; LMULMAX4-NEXT:    vse32.v v20, (a0)
-; LMULMAX4-NEXT:    mv a1, sp
-; LMULMAX4-NEXT:    mv a0, sp
-; LMULMAX4-NEXT:    vse32.v v16, (a1)
-; LMULMAX4-NEXT:    vmv1r.v v9, v8
-; LMULMAX4-NEXT:    vmv1r.v v10, v8
-; LMULMAX4-NEXT:    vmv1r.v v11, v8
-; LMULMAX4-NEXT:    vmv1r.v v12, v8
-; LMULMAX4-NEXT:    call split_vector_args
-; LMULMAX4-NEXT:    addi sp, s0, -256
-; LMULMAX4-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    addi sp, sp, 256
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: call_split_vector_args:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    addi sp, sp, -128
-; LMULMAX2-NEXT:    .cfi_def_cfa_offset 128
-; LMULMAX2-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
-; LMULMAX2-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
-; LMULMAX2-NEXT:    .cfi_offset ra, -8
-; LMULMAX2-NEXT:    .cfi_offset s0, -16
-; LMULMAX2-NEXT:    addi s0, sp, 128
-; LMULMAX2-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX2-NEXT:    andi sp, sp, -128
-; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v14, (a1)
-; LMULMAX2-NEXT:    addi a0, a1, 32
-; LMULMAX2-NEXT:    vle32.v v16, (a0)
-; LMULMAX2-NEXT:    addi a0, a1, 64
-; LMULMAX2-NEXT:    vle32.v v18, (a0)
-; LMULMAX2-NEXT:    addi a0, a1, 96
-; LMULMAX2-NEXT:    vle32.v v20, (a0)
-; LMULMAX2-NEXT:    addi a0, sp, 64
-; LMULMAX2-NEXT:    vse32.v v20, (a0)
-; LMULMAX2-NEXT:    addi a0, sp, 32
-; LMULMAX2-NEXT:    vse32.v v18, (a0)
-; LMULMAX2-NEXT:    mv a1, sp
-; LMULMAX2-NEXT:    mv a0, sp
-; LMULMAX2-NEXT:    vse32.v v16, (a1)
-; LMULMAX2-NEXT:    vmv1r.v v9, v8
-; LMULMAX2-NEXT:    vmv1r.v v10, v8
-; LMULMAX2-NEXT:    vmv1r.v v11, v8
-; LMULMAX2-NEXT:    vmv1r.v v12, v8
-; LMULMAX2-NEXT:    vmv.v.v v22, v14
-; LMULMAX2-NEXT:    call split_vector_args
-; LMULMAX2-NEXT:    addi sp, s0, -128
-; LMULMAX2-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT:    addi sp, sp, 128
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: call_split_vector_args:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi sp, sp, -128
-; LMULMAX1-NEXT:    .cfi_def_cfa_offset 128
-; LMULMAX1-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
-; LMULMAX1-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
-; LMULMAX1-NEXT:    .cfi_offset ra, -8
-; LMULMAX1-NEXT:    .cfi_offset s0, -16
-; LMULMAX1-NEXT:    addi s0, sp, 128
-; LMULMAX1-NEXT:    .cfi_def_cfa s0, 0
-; LMULMAX1-NEXT:    andi sp, sp, -128
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v13, (a1)
-; LMULMAX1-NEXT:    addi a0, a1, 32
-; LMULMAX1-NEXT:    vle32.v v15, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 16
-; LMULMAX1-NEXT:    vle32.v v14, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 48
-; LMULMAX1-NEXT:    vle32.v v16, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 64
-; LMULMAX1-NEXT:    vle32.v v17, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 80
-; LMULMAX1-NEXT:    vle32.v v18, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 96
-; LMULMAX1-NEXT:    vle32.v v19, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 112
-; LMULMAX1-NEXT:    vle32.v v20, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 64
-; LMULMAX1-NEXT:    vse32.v v20, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 48
-; LMULMAX1-NEXT:    vse32.v v19, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 32
-; LMULMAX1-NEXT:    vse32.v v18, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 16
-; LMULMAX1-NEXT:    vse32.v v17, (a0)
-; LMULMAX1-NEXT:    mv a1, sp
-; LMULMAX1-NEXT:    mv a0, sp
-; LMULMAX1-NEXT:    vse32.v v16, (a1)
-; LMULMAX1-NEXT:    vmv1r.v v9, v8
-; LMULMAX1-NEXT:    vmv1r.v v10, v8
-; LMULMAX1-NEXT:    vmv1r.v v11, v8
-; LMULMAX1-NEXT:    vmv1r.v v12, v8
-; LMULMAX1-NEXT:    vmv.v.v v21, v13
-; LMULMAX1-NEXT:    vmv.v.v v22, v14
-; LMULMAX1-NEXT:    vmv.v.v v23, v15
-; LMULMAX1-NEXT:    call split_vector_args
-; LMULMAX1-NEXT:    addi sp, s0, -128
-; LMULMAX1-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT:    addi sp, sp, 128
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: call_split_vector_args:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -256
+; CHECK-NEXT:    .cfi_def_cfa_offset 256
+; CHECK-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset ra, -8
+; CHECK-NEXT:    .cfi_offset s0, -16
+; CHECK-NEXT:    addi s0, sp, 256
+; CHECK-NEXT:    .cfi_def_cfa s0, 0
+; CHECK-NEXT:    andi sp, sp, -128
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v16, (a1)
+; CHECK-NEXT:    mv a1, sp
+; CHECK-NEXT:    mv a0, sp
+; CHECK-NEXT:    vse32.v v16, (a1)
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vmv1r.v v12, v8
+; CHECK-NEXT:    call split_vector_args
+; CHECK-NEXT:    addi sp, s0, -256
+; CHECK-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 256
+; CHECK-NEXT:    ret
   %a = load <2 x i32>, ptr %pa
   %b = load <32 x i32>, ptr %pb
   %r = call <32 x i32> @split_vector_args(<2 x i32> %a, <2 x i32> %a, <2 x i32> %a, <2 x i32> %a, <2 x i32> %a, <32 x i32> %b, <32 x i32> %b)
@@ -1185,217 +282,44 @@ define <32 x i32> @call_split_vector_args(ptr %pa, ptr %pb) {
 ; A rather pathological test case in which we exhaust all vector registers and
 ; all scalar registers, forcing %z and %8 to go through the stack.
 define <32 x i32> @vector_arg_via_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %8) {
-; LMULMAX8-LABEL: vector_arg_via_stack:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle32.v v16, (sp)
-; LMULMAX8-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: vector_arg_via_stack:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v16, (sp)
-; LMULMAX4-NEXT:    addi a0, sp, 64
-; LMULMAX4-NEXT:    vle32.v v20, (a0)
-; LMULMAX4-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX4-NEXT:    vadd.vv v12, v12, v20
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: vector_arg_via_stack:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    addi a0, sp, 64
-; LMULMAX2-NEXT:    vle32.v v16, (a0)
-; LMULMAX2-NEXT:    vle32.v v18, (sp)
-; LMULMAX2-NEXT:    addi a0, sp, 32
-; LMULMAX2-NEXT:    vle32.v v20, (a0)
-; LMULMAX2-NEXT:    addi a0, sp, 96
-; LMULMAX2-NEXT:    vle32.v v22, (a0)
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v18
-; LMULMAX2-NEXT:    vadd.vv v10, v10, v20
-; LMULMAX2-NEXT:    vadd.vv v12, v12, v16
-; LMULMAX2-NEXT:    vadd.vv v14, v14, v22
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: vector_arg_via_stack:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a0, sp, 112
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v16, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 96
-; LMULMAX1-NEXT:    vle32.v v17, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 80
-; LMULMAX1-NEXT:    vle32.v v18, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 64
-; LMULMAX1-NEXT:    vle32.v v19, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 32
-; LMULMAX1-NEXT:    vle32.v v20, (a0)
-; LMULMAX1-NEXT:    vle32.v v21, (sp)
-; LMULMAX1-NEXT:    addi a0, sp, 16
-; LMULMAX1-NEXT:    vle32.v v22, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 48
-; LMULMAX1-NEXT:    vle32.v v23, (a0)
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v21
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v22
-; LMULMAX1-NEXT:    vadd.vv v10, v10, v20
-; LMULMAX1-NEXT:    vadd.vv v11, v11, v23
-; LMULMAX1-NEXT:    vadd.vv v12, v12, v19
-; LMULMAX1-NEXT:    vadd.vv v13, v13, v18
-; LMULMAX1-NEXT:    vadd.vv v14, v14, v17
-; LMULMAX1-NEXT:    vadd.vv v15, v15, v16
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: vector_arg_via_stack:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v16, (sp)
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
   %s = add <32 x i32> %x, %z
   ret <32 x i32> %s
 }
 
 ; Calling the function above. Ensure we pass the arguments correctly.
 define <32 x i32> @pass_vector_arg_via_stack(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z) {
-; LMULMAX8-LABEL: pass_vector_arg_via_stack:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi sp, sp, -144
-; LMULMAX8-NEXT:    .cfi_def_cfa_offset 144
-; LMULMAX8-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    .cfi_offset ra, -8
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    vse32.v v8, (sp)
-; LMULMAX8-NEXT:    li a0, 8
-; LMULMAX8-NEXT:    li a1, 1
-; LMULMAX8-NEXT:    li a2, 2
-; LMULMAX8-NEXT:    li a3, 3
-; LMULMAX8-NEXT:    li a4, 4
-; LMULMAX8-NEXT:    li a5, 5
-; LMULMAX8-NEXT:    li a6, 6
-; LMULMAX8-NEXT:    li a7, 7
-; LMULMAX8-NEXT:    sd a0, 128(sp)
-; LMULMAX8-NEXT:    li a0, 0
-; LMULMAX8-NEXT:    vmv.v.i v16, 0
-; LMULMAX8-NEXT:    call vector_arg_via_stack
-; LMULMAX8-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    addi sp, sp, 144
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: pass_vector_arg_via_stack:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi sp, sp, -144
-; LMULMAX4-NEXT:    .cfi_def_cfa_offset 144
-; LMULMAX4-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    .cfi_offset ra, -8
-; LMULMAX4-NEXT:    li a0, 8
-; LMULMAX4-NEXT:    sd a0, 128(sp)
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vmv.v.i v8, 0
-; LMULMAX4-NEXT:    vse32.v v8, (sp)
-; LMULMAX4-NEXT:    addi a0, sp, 64
-; LMULMAX4-NEXT:    li a1, 1
-; LMULMAX4-NEXT:    li a2, 2
-; LMULMAX4-NEXT:    li a3, 3
-; LMULMAX4-NEXT:    li a4, 4
-; LMULMAX4-NEXT:    li a5, 5
-; LMULMAX4-NEXT:    li a6, 6
-; LMULMAX4-NEXT:    li a7, 7
-; LMULMAX4-NEXT:    vse32.v v8, (a0)
-; LMULMAX4-NEXT:    li a0, 0
-; LMULMAX4-NEXT:    vmv.v.i v12, 0
-; LMULMAX4-NEXT:    vmv.v.i v16, 0
-; LMULMAX4-NEXT:    vmv.v.i v20, 0
-; LMULMAX4-NEXT:    call vector_arg_via_stack
-; LMULMAX4-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    addi sp, sp, 144
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: pass_vector_arg_via_stack:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    addi sp, sp, -144
-; LMULMAX2-NEXT:    .cfi_def_cfa_offset 144
-; LMULMAX2-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
-; LMULMAX2-NEXT:    .cfi_offset ra, -8
-; LMULMAX2-NEXT:    li a0, 8
-; LMULMAX2-NEXT:    sd a0, 128(sp)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-NEXT:    vse32.v v8, (sp)
-; LMULMAX2-NEXT:    addi a0, sp, 96
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    addi a0, sp, 64
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    addi a0, sp, 32
-; LMULMAX2-NEXT:    li a1, 1
-; LMULMAX2-NEXT:    li a2, 2
-; LMULMAX2-NEXT:    li a3, 3
-; LMULMAX2-NEXT:    li a4, 4
-; LMULMAX2-NEXT:    li a5, 5
-; LMULMAX2-NEXT:    li a6, 6
-; LMULMAX2-NEXT:    li a7, 7
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    li a0, 0
-; LMULMAX2-NEXT:    vmv.v.i v10, 0
-; LMULMAX2-NEXT:    vmv.v.i v12, 0
-; LMULMAX2-NEXT:    vmv.v.i v14, 0
-; LMULMAX2-NEXT:    vmv.v.i v16, 0
-; LMULMAX2-NEXT:    vmv.v.i v18, 0
-; LMULMAX2-NEXT:    vmv.v.i v20, 0
-; LMULMAX2-NEXT:    vmv.v.i v22, 0
-; LMULMAX2-NEXT:    call vector_arg_via_stack
-; LMULMAX2-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT:    addi sp, sp, 144
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: pass_vector_arg_via_stack:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi sp, sp, -144
-; LMULMAX1-NEXT:    .cfi_def_cfa_offset 144
-; LMULMAX1-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
-; LMULMAX1-NEXT:    .cfi_offset ra, -8
-; LMULMAX1-NEXT:    li a0, 8
-; LMULMAX1-NEXT:    sd a0, 128(sp)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vse32.v v8, (sp)
-; LMULMAX1-NEXT:    addi a0, sp, 112
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 96
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 80
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 64
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 48
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 32
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 16
-; LMULMAX1-NEXT:    li a1, 1
-; LMULMAX1-NEXT:    li a2, 2
-; LMULMAX1-NEXT:    li a3, 3
-; LMULMAX1-NEXT:    li a4, 4
-; LMULMAX1-NEXT:    li a5, 5
-; LMULMAX1-NEXT:    li a6, 6
-; LMULMAX1-NEXT:    li a7, 7
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    li a0, 0
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmv.v.i v10, 0
-; LMULMAX1-NEXT:    vmv.v.i v11, 0
-; LMULMAX1-NEXT:    vmv.v.i v12, 0
-; LMULMAX1-NEXT:    vmv.v.i v13, 0
-; LMULMAX1-NEXT:    vmv.v.i v14, 0
-; LMULMAX1-NEXT:    vmv.v.i v15, 0
-; LMULMAX1-NEXT:    vmv.v.i v16, 0
-; LMULMAX1-NEXT:    vmv.v.i v17, 0
-; LMULMAX1-NEXT:    vmv.v.i v18, 0
-; LMULMAX1-NEXT:    vmv.v.i v19, 0
-; LMULMAX1-NEXT:    vmv.v.i v20, 0
-; LMULMAX1-NEXT:    vmv.v.i v21, 0
-; LMULMAX1-NEXT:    vmv.v.i v22, 0
-; LMULMAX1-NEXT:    vmv.v.i v23, 0
-; LMULMAX1-NEXT:    call vector_arg_via_stack
-; LMULMAX1-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT:    addi sp, sp, 144
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: pass_vector_arg_via_stack:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -144
+; CHECK-NEXT:    .cfi_def_cfa_offset 144
+; CHECK-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset ra, -8
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vse32.v v8, (sp)
+; CHECK-NEXT:    li a0, 8
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    li a2, 2
+; CHECK-NEXT:    li a3, 3
+; CHECK-NEXT:    li a4, 4
+; CHECK-NEXT:    li a5, 5
+; CHECK-NEXT:    li a6, 6
+; CHECK-NEXT:    li a7, 7
+; CHECK-NEXT:    sd a0, 128(sp)
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    call vector_arg_via_stack
+; CHECK-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 144
+; CHECK-NEXT:    ret
   %s = call <32 x i32> @vector_arg_via_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 8)
   ret <32 x i32> %s
 }
@@ -1415,194 +339,42 @@ define <4 x i1> @vector_mask_arg_via_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %
 ; Calling the function above. Ensure we pass the mask arguments correctly. We
 ; legalize stores of small masks such that the value is at least byte-sized.
 define <4 x i1> @pass_vector_mask_arg_via_stack(<4 x i1> %v) {
-; LMULMAX8-LABEL: pass_vector_mask_arg_via_stack:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi sp, sp, -160
-; LMULMAX8-NEXT:    .cfi_def_cfa_offset 160
-; LMULMAX8-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
-; LMULMAX8-NEXT:    .cfi_offset ra, -8
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    vse32.v v8, (sp)
-; LMULMAX8-NEXT:    li a0, 8
-; LMULMAX8-NEXT:    sd a0, 128(sp)
-; LMULMAX8-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v16, 0
-; LMULMAX8-NEXT:    vmerge.vim v16, v16, 1, v0
-; LMULMAX8-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v17, 0
-; LMULMAX8-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; LMULMAX8-NEXT:    vmv.v.v v17, v16
-; LMULMAX8-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX8-NEXT:    vmsne.vi v16, v17, 0
-; LMULMAX8-NEXT:    addi a0, sp, 136
-; LMULMAX8-NEXT:    li a5, 5
-; LMULMAX8-NEXT:    li a6, 6
-; LMULMAX8-NEXT:    li a7, 7
-; LMULMAX8-NEXT:    vsm.v v16, (a0)
-; LMULMAX8-NEXT:    li a0, 0
-; LMULMAX8-NEXT:    li a1, 0
-; LMULMAX8-NEXT:    li a2, 0
-; LMULMAX8-NEXT:    li a3, 0
-; LMULMAX8-NEXT:    li a4, 0
-; LMULMAX8-NEXT:    vmv8r.v v16, v8
-; LMULMAX8-NEXT:    call vector_mask_arg_via_stack
-; LMULMAX8-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
-; LMULMAX8-NEXT:    addi sp, sp, 160
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX4-LABEL: pass_vector_mask_arg_via_stack:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi sp, sp, -160
-; LMULMAX4-NEXT:    .cfi_def_cfa_offset 160
-; LMULMAX4-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
-; LMULMAX4-NEXT:    .cfi_offset ra, -8
-; LMULMAX4-NEXT:    li a0, 8
-; LMULMAX4-NEXT:    sd a0, 128(sp)
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vmv.v.i v8, 0
-; LMULMAX4-NEXT:    vse32.v v8, (sp)
-; LMULMAX4-NEXT:    addi a0, sp, 64
-; LMULMAX4-NEXT:    vse32.v v8, (a0)
-; LMULMAX4-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX4-NEXT:    vmv.v.i v12, 0
-; LMULMAX4-NEXT:    vmerge.vim v12, v12, 1, v0
-; LMULMAX4-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX4-NEXT:    vmv.v.i v13, 0
-; LMULMAX4-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; LMULMAX4-NEXT:    vmv.v.v v13, v12
-; LMULMAX4-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX4-NEXT:    vmsne.vi v12, v13, 0
-; LMULMAX4-NEXT:    addi a0, sp, 136
-; LMULMAX4-NEXT:    li a5, 5
-; LMULMAX4-NEXT:    li a6, 6
-; LMULMAX4-NEXT:    li a7, 7
-; LMULMAX4-NEXT:    vsm.v v12, (a0)
-; LMULMAX4-NEXT:    li a0, 0
-; LMULMAX4-NEXT:    li a1, 0
-; LMULMAX4-NEXT:    li a2, 0
-; LMULMAX4-NEXT:    li a3, 0
-; LMULMAX4-NEXT:    li a4, 0
-; LMULMAX4-NEXT:    vmv4r.v v12, v8
-; LMULMAX4-NEXT:    vmv4r.v v16, v8
-; LMULMAX4-NEXT:    vmv4r.v v20, v8
-; LMULMAX4-NEXT:    call vector_mask_arg_via_stack
-; LMULMAX4-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
-; LMULMAX4-NEXT:    addi sp, sp, 160
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX2-LABEL: pass_vector_mask_arg_via_stack:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    addi sp, sp, -160
-; LMULMAX2-NEXT:    .cfi_def_cfa_offset 160
-; LMULMAX2-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
-; LMULMAX2-NEXT:    .cfi_offset ra, -8
-; LMULMAX2-NEXT:    li a0, 8
-; LMULMAX2-NEXT:    sd a0, 128(sp)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-NEXT:    vse32.v v8, (sp)
-; LMULMAX2-NEXT:    addi a0, sp, 96
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    addi a0, sp, 64
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    addi a0, sp, 32
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v10, 0
-; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v11, 0
-; LMULMAX2-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; LMULMAX2-NEXT:    vmv.v.v v11, v10
-; LMULMAX2-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX2-NEXT:    vmsne.vi v10, v11, 0
-; LMULMAX2-NEXT:    addi a0, sp, 136
-; LMULMAX2-NEXT:    li a5, 5
-; LMULMAX2-NEXT:    li a6, 6
-; LMULMAX2-NEXT:    li a7, 7
-; LMULMAX2-NEXT:    vsm.v v10, (a0)
-; LMULMAX2-NEXT:    li a0, 0
-; LMULMAX2-NEXT:    li a1, 0
-; LMULMAX2-NEXT:    li a2, 0
-; LMULMAX2-NEXT:    li a3, 0
-; LMULMAX2-NEXT:    li a4, 0
-; LMULMAX2-NEXT:    vmv2r.v v10, v8
-; LMULMAX2-NEXT:    vmv2r.v v12, v8
-; LMULMAX2-NEXT:    vmv2r.v v14, v8
-; LMULMAX2-NEXT:    vmv2r.v v16, v8
-; LMULMAX2-NEXT:    vmv2r.v v18, v8
-; LMULMAX2-NEXT:    vmv2r.v v20, v8
-; LMULMAX2-NEXT:    vmv2r.v v22, v8
-; LMULMAX2-NEXT:    call vector_mask_arg_via_stack
-; LMULMAX2-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT:    addi sp, sp, 160
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: pass_vector_mask_arg_via_stack:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi sp, sp, -160
-; LMULMAX1-NEXT:    .cfi_def_cfa_offset 160
-; LMULMAX1-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
-; LMULMAX1-NEXT:    .cfi_offset ra, -8
-; LMULMAX1-NEXT:    li a0, 8
-; LMULMAX1-NEXT:    sd a0, 128(sp)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vse32.v v8, (sp)
-; LMULMAX1-NEXT:    addi a0, sp, 112
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 96
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 80
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 64
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 48
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 32
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, sp, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v10, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vmv.v.v v10, v9
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v9, v10, 0
-; LMULMAX1-NEXT:    addi a0, sp, 136
-; LMULMAX1-NEXT:    li a5, 5
-; LMULMAX1-NEXT:    li a6, 6
-; LMULMAX1-NEXT:    li a7, 7
-; LMULMAX1-NEXT:    vsm.v v9, (a0)
-; LMULMAX1-NEXT:    li a0, 0
-; LMULMAX1-NEXT:    li a1, 0
-; LMULMAX1-NEXT:    li a2, 0
-; LMULMAX1-NEXT:    li a3, 0
-; LMULMAX1-NEXT:    li a4, 0
-; LMULMAX1-NEXT:    vmv1r.v v9, v8
-; LMULMAX1-NEXT:    vmv1r.v v10, v8
-; LMULMAX1-NEXT:    vmv1r.v v11, v8
-; LMULMAX1-NEXT:    vmv1r.v v12, v8
-; LMULMAX1-NEXT:    vmv1r.v v13, v8
-; LMULMAX1-NEXT:    vmv1r.v v14, v8
-; LMULMAX1-NEXT:    vmv1r.v v15, v8
-; LMULMAX1-NEXT:    vmv1r.v v16, v8
-; LMULMAX1-NEXT:    vmv1r.v v17, v8
-; LMULMAX1-NEXT:    vmv1r.v v18, v8
-; LMULMAX1-NEXT:    vmv1r.v v19, v8
-; LMULMAX1-NEXT:    vmv1r.v v20, v8
-; LMULMAX1-NEXT:    vmv1r.v v21, v8
-; LMULMAX1-NEXT:    vmv1r.v v22, v8
-; LMULMAX1-NEXT:    vmv1r.v v23, v8
-; LMULMAX1-NEXT:    call vector_mask_arg_via_stack
-; LMULMAX1-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT:    addi sp, sp, 160
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: pass_vector_mask_arg_via_stack:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -160
+; CHECK-NEXT:    .cfi_def_cfa_offset 160
+; CHECK-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset ra, -8
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vse32.v v8, (sp)
+; CHECK-NEXT:    li a0, 8
+; CHECK-NEXT:    sd a0, 128(sp)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    vmerge.vim v16, v16, 1, v0
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v17, 0
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
+; CHECK-NEXT:    vmv.v.v v17, v16
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vmsne.vi v16, v17, 0
+; CHECK-NEXT:    addi a0, sp, 136
+; CHECK-NEXT:    li a5, 5
+; CHECK-NEXT:    li a6, 6
+; CHECK-NEXT:    li a7, 7
+; CHECK-NEXT:    vsm.v v16, (a0)
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a2, 0
+; CHECK-NEXT:    li a3, 0
+; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    vmv8r.v v16, v8
+; CHECK-NEXT:    call vector_mask_arg_via_stack
+; CHECK-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 160
+; CHECK-NEXT:    ret
   %r = call <4 x i1> @vector_mask_arg_via_stack(i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 6, i32 7, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 8, <4 x i1> %v, <4 x i1> %v)
   ret <4 x i1> %r
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index a4a2cd1333086d..3e5a89b9bce388 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -1,61 +1,71 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32I
-; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32F
-; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64F
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV32F
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV64F
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV32D
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV64D
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
 
 define void @ctlz_v16i8(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: ctlz_v16i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vnot.v v8, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    li a1, 85
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsub.vv v8, v8, v9
-; CHECK-NEXT:    li a1, 51
-; CHECK-NEXT:    vand.vx v9, v8, a1
-; CHECK-NEXT:    vsrl.vi v8, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vadd.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vadd.vv v8, v8, v9
-; CHECK-NEXT:    vand.vi v8, v8, 15
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_v16i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-NEXT:    vzext.vf2 v10, v8
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-NEXT:    vnsrl.wi v8, v12, 23
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX8-NEXT:    li a1, 134
-; LMULMAX8-NEXT:    vrsub.vx v8, v10, a1
-; LMULMAX8-NEXT:    li a1, 8
-; LMULMAX8-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_v16i8:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RVI-NEXT:    vle8.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    li a1, 85
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    li a1, 51
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    vand.vi v8, v8, 15
+; RVI-NEXT:    vse8.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_v16i8:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVF-NEXT:    vle8.v v8, (a0)
+; RVF-NEXT:    vzext.vf2 v10, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVF-NEXT:    vnsrl.wi v8, v12, 23
+; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RVF-NEXT:    vnsrl.wi v10, v8, 0
+; RVF-NEXT:    li a1, 134
+; RVF-NEXT:    vrsub.vx v8, v10, a1
+; RVF-NEXT:    li a1, 8
+; RVF-NEXT:    vminu.vx v8, v8, a1
+; RVF-NEXT:    vse8.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_v16i8:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVD-NEXT:    vle8.v v8, (a0)
+; RVD-NEXT:    vzext.vf2 v10, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVD-NEXT:    vnsrl.wi v8, v12, 23
+; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RVD-NEXT:    vnsrl.wi v10, v8, 0
+; RVD-NEXT:    li a1, 134
+; RVD-NEXT:    vrsub.vx v8, v10, a1
+; RVD-NEXT:    li a1, 8
+; RVD-NEXT:    vminu.vx v8, v8, a1
+; RVD-NEXT:    vse8.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_v16i8:
 ; ZVBB:       # %bb.0:
@@ -73,175 +83,66 @@ define void @ctlz_v16i8(ptr %x, ptr %y) nounwind {
 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
 
 define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: ctlz_v8i16:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 5
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 3
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 1
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    li a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV32I-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: ctlz_v8i16:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 5
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 3
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 1
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV64I-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctlz_v8i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX1-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX1-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-NEXT:    lui a1, 5
-; LMULMAX1-NEXT:    addi a1, a1, 1365
-; LMULMAX1-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-NEXT:    lui a1, 3
-; LMULMAX1-NEXT:    addi a1, a1, 819
-; LMULMAX1-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-NEXT:    lui a1, 1
-; LMULMAX1-NEXT:    addi a1, a1, -241
-; LMULMAX1-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-NEXT:    li a1, 257
-; LMULMAX1-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: ctlz_v8i16:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV32F-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 142
-; LMULMAX2-RV32F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    li a1, 16
-; LMULMAX2-RV32F-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: ctlz_v8i16:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV64F-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 142
-; LMULMAX2-RV64F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    li a1, 16
-; LMULMAX2-RV64F-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: ctlz_v8i16:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV32D-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV32D-NEXT:    li a1, 142
-; LMULMAX2-RV32D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 16
-; LMULMAX2-RV32D-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: ctlz_v8i16:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV64D-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV64D-NEXT:    li a1, 142
-; LMULMAX2-RV64D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 16
-; LMULMAX2-RV64D-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_v8i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX8-NEXT:    li a1, 142
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 16
-; LMULMAX8-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_v8i16:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVI-NEXT:    vle16.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 8
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    lui a1, 5
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 3
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 1
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    li a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 8
+; RVI-NEXT:    vse16.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_v8i16:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVF-NEXT:    vle16.v v8, (a0)
+; RVF-NEXT:    vfwcvt.f.xu.v v10, v8
+; RVF-NEXT:    vnsrl.wi v8, v10, 23
+; RVF-NEXT:    li a1, 142
+; RVF-NEXT:    vrsub.vx v8, v8, a1
+; RVF-NEXT:    li a1, 16
+; RVF-NEXT:    vminu.vx v8, v8, a1
+; RVF-NEXT:    vse16.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_v8i16:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVD-NEXT:    vle16.v v8, (a0)
+; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
+; RVD-NEXT:    vnsrl.wi v8, v10, 23
+; RVD-NEXT:    li a1, 142
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    li a1, 16
+; RVD-NEXT:    vminu.vx v8, v8, a1
+; RVD-NEXT:    vse16.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_v8i16:
 ; ZVBB:       # %bb.0:
@@ -259,153 +160,72 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
 
 define void @ctlz_v4i32(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: ctlz_v4i32:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 16
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV32I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: ctlz_v4i32:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 16
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: ctlz_v4i32:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 158
-; LMULMAX2-RV32F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    li a1, 32
-; LMULMAX2-RV32F-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: ctlz_v4i32:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 158
-; LMULMAX2-RV64F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    li a1, 32
-; LMULMAX2-RV64F-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: ctlz_v4i32:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vnsrl.wx v8, v10, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1054
-; LMULMAX2-RV32D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 32
-; LMULMAX2-RV32D-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: ctlz_v4i32:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vnsrl.wx v8, v10, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1054
-; LMULMAX2-RV64D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 32
-; LMULMAX2-RV64D-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_v4i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vnsrl.wx v8, v10, a1
-; LMULMAX8-NEXT:    li a1, 1054
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_v4i32:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVI-NEXT:    vle32.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 8
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 16
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    lui a1, 349525
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 209715
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 61681
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    lui a1, 4112
+; RVI-NEXT:    addi a1, a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 24
+; RVI-NEXT:    vse32.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_v4i32:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVF-NEXT:    vle32.v v8, (a0)
+; RVF-NEXT:    fsrmi a1, 1
+; RVF-NEXT:    vfcvt.f.xu.v v8, v8
+; RVF-NEXT:    fsrm a1
+; RVF-NEXT:    vsrl.vi v8, v8, 23
+; RVF-NEXT:    li a1, 158
+; RVF-NEXT:    vrsub.vx v8, v8, a1
+; RVF-NEXT:    li a1, 32
+; RVF-NEXT:    vminu.vx v8, v8, a1
+; RVF-NEXT:    vse32.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_v4i32:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVD-NEXT:    vle32.v v8, (a0)
+; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vnsrl.wx v8, v10, a1
+; RVD-NEXT:    li a1, 1054
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    li a1, 32
+; RVD-NEXT:    vminu.vx v8, v8, a1
+; RVD-NEXT:    vse32.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_v4i32:
 ; ZVBB:       # %bb.0:
@@ -423,192 +243,160 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind {
 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
 
 define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: ctlz_v2i64:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 16
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    li a1, 32
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v9, v10
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    li a1, 56
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: ctlz_v2i64:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 16
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    li a1, 32
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 56
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: ctlz_v2i64:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    li a1, 190
-; LMULMAX2-RV32F-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfncvt.f.xu.w v10, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v10, 23
-; LMULMAX2-RV32F-NEXT:    vwsubu.wv v9, v9, v8
-; LMULMAX2-RV32F-NEXT:    li a1, 64
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vminu.vx v8, v9, a1
-; LMULMAX2-RV32F-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: ctlz_v2i64:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    li a1, 190
-; LMULMAX2-RV64F-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfncvt.f.xu.w v10, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v10, 23
-; LMULMAX2-RV64F-NEXT:    vwsubu.vv v10, v9, v8
-; LMULMAX2-RV64F-NEXT:    li a1, 64
-; LMULMAX2-RV64F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vminu.vx v8, v10, a1
-; LMULMAX2-RV64F-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: ctlz_v2i64:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1086
-; LMULMAX2-RV32D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 64
-; LMULMAX2-RV32D-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: ctlz_v2i64:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1086
-; LMULMAX2-RV64D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 64
-; LMULMAX2-RV64D-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_v2i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    fsrmi a1, 1
-; LMULMAX8-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX8-NEXT:    fsrm a1
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 1086
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 64
-; LMULMAX8-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RV32I-LABEL: ctlz_v2i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vle64.v v8, (a0)
+; RV32I-NEXT:    vsrl.vi v9, v8, 1
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    vsrl.vi v9, v8, 2
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    vsrl.vi v9, v8, 4
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    vsrl.vi v9, v8, 8
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    vsrl.vi v9, v8, 16
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    li a1, 32
+; RV32I-NEXT:    vsrl.vx v9, v8, a1
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vsrl.vi v9, v8, 1
+; RV32I-NEXT:    lui a1, 349525
+; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v9, v9, v10
+; RV32I-NEXT:    vsub.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v10, v8, v9
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    vadd.vv v8, v10, v8
+; RV32I-NEXT:    vsrl.vi v9, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vmul.vv v8, v8, v9
+; RV32I-NEXT:    li a1, 56
+; RV32I-NEXT:    vsrl.vx v8, v8, a1
+; RV32I-NEXT:    vse64.v v8, (a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: ctlz_v2i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64I-NEXT:    vle64.v v8, (a0)
+; RV64I-NEXT:    vsrl.vi v9, v8, 1
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    vsrl.vi v9, v8, 2
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    vsrl.vi v9, v8, 4
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    vsrl.vi v9, v8, 8
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    vsrl.vi v9, v8, 16
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    li a1, 32
+; RV64I-NEXT:    vsrl.vx v9, v8, a1
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vsrl.vi v9, v8, 1
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v9, v9, a1
+; RV64I-NEXT:    vsub.vv v8, v8, v9
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v9, v8, a1
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vadd.vv v8, v9, v8
+; RV64I-NEXT:    vsrl.vi v9, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v9
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    li a1, 56
+; RV64I-NEXT:    vsrl.vx v8, v8, a1
+; RV64I-NEXT:    vse64.v v8, (a0)
+; RV64I-NEXT:    ret
+;
+; RV32F-LABEL: ctlz_v2i64:
+; RV32F:       # %bb.0:
+; RV32F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32F-NEXT:    vle64.v v8, (a0)
+; RV32F-NEXT:    li a1, 190
+; RV32F-NEXT:    vmv.v.x v9, a1
+; RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV32F-NEXT:    fsrmi a1, 1
+; RV32F-NEXT:    vfncvt.f.xu.w v10, v8
+; RV32F-NEXT:    fsrm a1
+; RV32F-NEXT:    vsrl.vi v8, v10, 23
+; RV32F-NEXT:    vwsubu.wv v9, v9, v8
+; RV32F-NEXT:    li a1, 64
+; RV32F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV32F-NEXT:    vminu.vx v8, v9, a1
+; RV32F-NEXT:    vse64.v v8, (a0)
+; RV32F-NEXT:    ret
+;
+; RV64F-LABEL: ctlz_v2i64:
+; RV64F:       # %bb.0:
+; RV64F-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV64F-NEXT:    vle64.v v8, (a0)
+; RV64F-NEXT:    li a1, 190
+; RV64F-NEXT:    vmv.v.x v9, a1
+; RV64F-NEXT:    fsrmi a1, 1
+; RV64F-NEXT:    vfncvt.f.xu.w v10, v8
+; RV64F-NEXT:    fsrm a1
+; RV64F-NEXT:    vsrl.vi v8, v10, 23
+; RV64F-NEXT:    vwsubu.vv v10, v9, v8
+; RV64F-NEXT:    li a1, 64
+; RV64F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV64F-NEXT:    vminu.vx v8, v10, a1
+; RV64F-NEXT:    vse64.v v8, (a0)
+; RV64F-NEXT:    ret
+;
+; RVD-LABEL: ctlz_v2i64:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RVD-NEXT:    vle64.v v8, (a0)
+; RVD-NEXT:    fsrmi a1, 1
+; RVD-NEXT:    vfcvt.f.xu.v v8, v8
+; RVD-NEXT:    fsrm a1
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vsrl.vx v8, v8, a1
+; RVD-NEXT:    li a1, 1086
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    li a1, 64
+; RVD-NEXT:    vminu.vx v8, v8, a1
+; RVD-NEXT:    vse64.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_v2i64:
 ; ZVBB:       # %bb.0:
@@ -626,95 +414,66 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
 
 define void @ctlz_v32i8(ptr %x, ptr %y) nounwind {
-; LMULMAX2-LABEL: ctlz_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vnot.v v8, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    li a1, 85
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    li a1, 51
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctlz_v32i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    li a2, 85
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    li a3, 51
-; LMULMAX1-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vnot.v v9, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vi v9, v9, 15
-; LMULMAX1-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_v32i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; LMULMAX8-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-NEXT:    vzext.vf2 v12, v8
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v16, v12
-; LMULMAX8-NEXT:    vnsrl.wi v8, v16, 23
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v12, v8, 0
-; LMULMAX8-NEXT:    li a1, 134
-; LMULMAX8-NEXT:    vrsub.vx v8, v12, a1
-; LMULMAX8-NEXT:    li a1, 8
-; LMULMAX8-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_v32i8:
+; RVI:       # %bb.0:
+; RVI-NEXT:    li a1, 32
+; RVI-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RVI-NEXT:    vle8.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    li a1, 85
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    li a1, 51
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    vand.vi v8, v8, 15
+; RVI-NEXT:    vse8.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_v32i8:
+; RVF:       # %bb.0:
+; RVF-NEXT:    li a1, 32
+; RVF-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RVF-NEXT:    vle8.v v8, (a0)
+; RVF-NEXT:    vzext.vf2 v12, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v16, v12
+; RVF-NEXT:    vnsrl.wi v8, v16, 23
+; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; RVF-NEXT:    vnsrl.wi v12, v8, 0
+; RVF-NEXT:    li a1, 134
+; RVF-NEXT:    vrsub.vx v8, v12, a1
+; RVF-NEXT:    li a1, 8
+; RVF-NEXT:    vminu.vx v8, v8, a1
+; RVF-NEXT:    vse8.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_v32i8:
+; RVD:       # %bb.0:
+; RVD-NEXT:    li a1, 32
+; RVD-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RVD-NEXT:    vle8.v v8, (a0)
+; RVD-NEXT:    vzext.vf2 v12, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v16, v12
+; RVD-NEXT:    vnsrl.wi v8, v16, 23
+; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; RVD-NEXT:    vnsrl.wi v12, v8, 0
+; RVD-NEXT:    li a1, 134
+; RVD-NEXT:    vrsub.vx v8, v12, a1
+; RVD-NEXT:    li a1, 8
+; RVD-NEXT:    vminu.vx v8, v8, a1
+; RVD-NEXT:    vse8.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_v32i8:
 ; ZVBB:       # %bb.0:
@@ -733,112 +492,66 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind {
 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
 
 define void @ctlz_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-LABEL: ctlz_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vnot.v v8, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    lui a1, 5
-; LMULMAX2-NEXT:    addi a1, a1, 1365
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 3
-; LMULMAX2-NEXT:    addi a1, a1, 819
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 1
-; LMULMAX2-NEXT:    addi a1, a1, -241
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    li a1, 257
-; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctlz_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    lui a2, 5
-; LMULMAX1-NEXT:    addi a2, a2, 1365
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a3, 3
-; LMULMAX1-NEXT:    addi a3, a3, 819
-; LMULMAX1-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a4, 1
-; LMULMAX1-NEXT:    addi a4, a4, -241
-; LMULMAX1-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-NEXT:    li a5, 257
-; LMULMAX1-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vnot.v v9, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_v16i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v8
-; LMULMAX8-NEXT:    vnsrl.wi v8, v12, 23
-; LMULMAX8-NEXT:    li a1, 142
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 16
-; LMULMAX8-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_v16i16:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVI-NEXT:    vle16.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 8
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    lui a1, 5
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 3
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 1
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    li a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 8
+; RVI-NEXT:    vse16.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_v16i16:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVF-NEXT:    vle16.v v8, (a0)
+; RVF-NEXT:    vfwcvt.f.xu.v v12, v8
+; RVF-NEXT:    vnsrl.wi v8, v12, 23
+; RVF-NEXT:    li a1, 142
+; RVF-NEXT:    vrsub.vx v8, v8, a1
+; RVF-NEXT:    li a1, 16
+; RVF-NEXT:    vminu.vx v8, v8, a1
+; RVF-NEXT:    vse16.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_v16i16:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVD-NEXT:    vle16.v v8, (a0)
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
+; RVD-NEXT:    vnsrl.wi v8, v12, 23
+; RVD-NEXT:    li a1, 142
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    li a1, 16
+; RVD-NEXT:    vminu.vx v8, v8, a1
+; RVD-NEXT:    vse16.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_v16i16:
 ; ZVBB:       # %bb.0:
@@ -856,155 +569,72 @@ define void @ctlz_v16i16(ptr %x, ptr %y) nounwind {
 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
 
 define void @ctlz_v8i32(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: ctlz_v8i32:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 16
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV32I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: ctlz_v8i32:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 16
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: ctlz_v8i32:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 158
-; LMULMAX2-RV32F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    li a1, 32
-; LMULMAX2-RV32F-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: ctlz_v8i32:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 158
-; LMULMAX2-RV64F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    li a1, 32
-; LMULMAX2-RV64F-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: ctlz_v8i32:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV32D-NEXT:    li a1, 158
-; LMULMAX2-RV32D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 32
-; LMULMAX2-RV32D-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: ctlz_v8i32:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV64D-NEXT:    li a1, 158
-; LMULMAX2-RV64D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 32
-; LMULMAX2-RV64D-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v8
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vnsrl.wx v8, v12, a1
-; LMULMAX8-NEXT:    li a1, 1054
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_v8i32:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVI-NEXT:    vle32.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 8
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 16
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    lui a1, 349525
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 209715
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 61681
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    lui a1, 4112
+; RVI-NEXT:    addi a1, a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 24
+; RVI-NEXT:    vse32.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_v8i32:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVF-NEXT:    vle32.v v8, (a0)
+; RVF-NEXT:    fsrmi a1, 1
+; RVF-NEXT:    vfcvt.f.xu.v v8, v8
+; RVF-NEXT:    fsrm a1
+; RVF-NEXT:    vsrl.vi v8, v8, 23
+; RVF-NEXT:    li a1, 158
+; RVF-NEXT:    vrsub.vx v8, v8, a1
+; RVF-NEXT:    li a1, 32
+; RVF-NEXT:    vminu.vx v8, v8, a1
+; RVF-NEXT:    vse32.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_v8i32:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVD-NEXT:    vle32.v v8, (a0)
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vnsrl.wx v8, v12, a1
+; RVD-NEXT:    li a1, 1054
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    li a1, 32
+; RVD-NEXT:    vminu.vx v8, v8, a1
+; RVD-NEXT:    vse32.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_v8i32:
 ; ZVBB:       # %bb.0:
@@ -1022,192 +652,160 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind {
 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
 
 define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: ctlz_v4i64:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 16
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    li a1, 32
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v10, v12
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v12, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v12, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    li a1, 56
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: ctlz_v4i64:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 16
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    li a1, 32
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 56
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: ctlz_v4i64:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    li a1, 190
-; LMULMAX2-RV32F-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfncvt.f.xu.w v12, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v12, 23
-; LMULMAX2-RV32F-NEXT:    vwsubu.wv v10, v10, v8
-; LMULMAX2-RV32F-NEXT:    li a1, 64
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vminu.vx v8, v10, a1
-; LMULMAX2-RV32F-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: ctlz_v4i64:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    li a1, 190
-; LMULMAX2-RV64F-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfncvt.f.xu.w v11, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v11, 23
-; LMULMAX2-RV64F-NEXT:    vwsubu.vv v12, v10, v8
-; LMULMAX2-RV64F-NEXT:    li a1, 64
-; LMULMAX2-RV64F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; LMULMAX2-RV64F-NEXT:    vminu.vx v8, v12, a1
-; LMULMAX2-RV64F-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: ctlz_v4i64:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1086
-; LMULMAX2-RV32D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 64
-; LMULMAX2-RV32D-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: ctlz_v4i64:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1086
-; LMULMAX2-RV64D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 64
-; LMULMAX2-RV64D-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_v4i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    fsrmi a1, 1
-; LMULMAX8-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX8-NEXT:    fsrm a1
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 1086
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 64
-; LMULMAX8-NEXT:    vminu.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RV32I-LABEL: ctlz_v4i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vle64.v v8, (a0)
+; RV32I-NEXT:    vsrl.vi v10, v8, 1
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v10, v8, 2
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v10, v8, 4
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v10, v8, 8
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v10, v8, 16
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    li a1, 32
+; RV32I-NEXT:    vsrl.vx v10, v8, a1
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vsrl.vi v10, v8, 1
+; RV32I-NEXT:    lui a1, 349525
+; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v12, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v10, v10, v12
+; RV32I-NEXT:    vsub.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v12, v8, v10
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vadd.vv v8, v12, v8
+; RV32I-NEXT:    vsrl.vi v10, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vmul.vv v8, v8, v10
+; RV32I-NEXT:    li a1, 56
+; RV32I-NEXT:    vsrl.vx v8, v8, a1
+; RV32I-NEXT:    vse64.v v8, (a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: ctlz_v4i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64I-NEXT:    vle64.v v8, (a0)
+; RV64I-NEXT:    vsrl.vi v10, v8, 1
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    vsrl.vi v10, v8, 2
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    vsrl.vi v10, v8, 4
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    vsrl.vi v10, v8, 8
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    vsrl.vi v10, v8, 16
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    li a1, 32
+; RV64I-NEXT:    vsrl.vx v10, v8, a1
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vsrl.vi v10, v8, 1
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v10, v10, a1
+; RV64I-NEXT:    vsub.vv v8, v8, v10
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v10, v8, a1
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vadd.vv v8, v10, v8
+; RV64I-NEXT:    vsrl.vi v10, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v10
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    li a1, 56
+; RV64I-NEXT:    vsrl.vx v8, v8, a1
+; RV64I-NEXT:    vse64.v v8, (a0)
+; RV64I-NEXT:    ret
+;
+; RV32F-LABEL: ctlz_v4i64:
+; RV32F:       # %bb.0:
+; RV32F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32F-NEXT:    vle64.v v8, (a0)
+; RV32F-NEXT:    li a1, 190
+; RV32F-NEXT:    vmv.v.x v10, a1
+; RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; RV32F-NEXT:    fsrmi a1, 1
+; RV32F-NEXT:    vfncvt.f.xu.w v12, v8
+; RV32F-NEXT:    fsrm a1
+; RV32F-NEXT:    vsrl.vi v8, v12, 23
+; RV32F-NEXT:    vwsubu.wv v10, v10, v8
+; RV32F-NEXT:    li a1, 64
+; RV32F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV32F-NEXT:    vminu.vx v8, v10, a1
+; RV32F-NEXT:    vse64.v v8, (a0)
+; RV32F-NEXT:    ret
+;
+; RV64F-LABEL: ctlz_v4i64:
+; RV64F:       # %bb.0:
+; RV64F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64F-NEXT:    vle64.v v8, (a0)
+; RV64F-NEXT:    li a1, 190
+; RV64F-NEXT:    vmv.v.x v10, a1
+; RV64F-NEXT:    fsrmi a1, 1
+; RV64F-NEXT:    vfncvt.f.xu.w v11, v8
+; RV64F-NEXT:    fsrm a1
+; RV64F-NEXT:    vsrl.vi v8, v11, 23
+; RV64F-NEXT:    vwsubu.vv v12, v10, v8
+; RV64F-NEXT:    li a1, 64
+; RV64F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV64F-NEXT:    vminu.vx v8, v12, a1
+; RV64F-NEXT:    vse64.v v8, (a0)
+; RV64F-NEXT:    ret
+;
+; RVD-LABEL: ctlz_v4i64:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RVD-NEXT:    vle64.v v8, (a0)
+; RVD-NEXT:    fsrmi a1, 1
+; RVD-NEXT:    vfcvt.f.xu.v v8, v8
+; RVD-NEXT:    fsrm a1
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vsrl.vx v8, v8, a1
+; RVD-NEXT:    li a1, 1086
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    li a1, 64
+; RVD-NEXT:    vminu.vx v8, v8, a1
+; RVD-NEXT:    vse64.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_v4i64:
 ; ZVBB:       # %bb.0:
@@ -1225,45 +823,59 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
 
 define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: ctlz_zero_undef_v16i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 2
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vor.vv v8, v8, v9
-; CHECK-NEXT:    vnot.v v8, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    li a1, 85
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsub.vv v8, v8, v9
-; CHECK-NEXT:    li a1, 51
-; CHECK-NEXT:    vand.vx v9, v8, a1
-; CHECK-NEXT:    vsrl.vi v8, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vadd.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vadd.vv v8, v8, v9
-; CHECK-NEXT:    vand.vi v8, v8, 15
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_zero_undef_v16i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-NEXT:    vzext.vf2 v10, v8
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-NEXT:    vnsrl.wi v8, v12, 23
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX8-NEXT:    li a1, 134
-; LMULMAX8-NEXT:    vrsub.vx v8, v10, a1
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_zero_undef_v16i8:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RVI-NEXT:    vle8.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    li a1, 85
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    li a1, 51
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    vand.vi v8, v8, 15
+; RVI-NEXT:    vse8.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_zero_undef_v16i8:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVF-NEXT:    vle8.v v8, (a0)
+; RVF-NEXT:    vzext.vf2 v10, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVF-NEXT:    vnsrl.wi v8, v12, 23
+; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RVF-NEXT:    vnsrl.wi v10, v8, 0
+; RVF-NEXT:    li a1, 134
+; RVF-NEXT:    vrsub.vx v8, v10, a1
+; RVF-NEXT:    vse8.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_zero_undef_v16i8:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVD-NEXT:    vle8.v v8, (a0)
+; RVD-NEXT:    vzext.vf2 v10, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVD-NEXT:    vnsrl.wi v8, v12, 23
+; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RVD-NEXT:    vnsrl.wi v10, v8, 0
+; RVD-NEXT:    li a1, 134
+; RVD-NEXT:    vrsub.vx v8, v10, a1
+; RVD-NEXT:    vse8.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_zero_undef_v16i8:
 ; ZVBB:       # %bb.0:
@@ -1280,165 +892,62 @@ define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
 }
 
 define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 5
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 3
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 1
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    li a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV32I-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 5
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 3
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 1
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV64I-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX1-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX1-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-NEXT:    lui a1, 5
-; LMULMAX1-NEXT:    addi a1, a1, 1365
-; LMULMAX1-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-NEXT:    lui a1, 3
-; LMULMAX1-NEXT:    addi a1, a1, 819
-; LMULMAX1-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-NEXT:    lui a1, 1
-; LMULMAX1-NEXT:    addi a1, a1, -241
-; LMULMAX1-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-NEXT:    li a1, 257
-; LMULMAX1-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV32F-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 142
-; LMULMAX2-RV32F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV64F-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 142
-; LMULMAX2-RV64F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV32D-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV32D-NEXT:    li a1, 142
-; LMULMAX2-RV32D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV64D-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV64D-NEXT:    li a1, 142
-; LMULMAX2-RV64D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_zero_undef_v8i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX8-NEXT:    li a1, 142
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_zero_undef_v8i16:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVI-NEXT:    vle16.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 8
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    lui a1, 5
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 3
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 1
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    li a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 8
+; RVI-NEXT:    vse16.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_zero_undef_v8i16:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVF-NEXT:    vle16.v v8, (a0)
+; RVF-NEXT:    vfwcvt.f.xu.v v10, v8
+; RVF-NEXT:    vnsrl.wi v8, v10, 23
+; RVF-NEXT:    li a1, 142
+; RVF-NEXT:    vrsub.vx v8, v8, a1
+; RVF-NEXT:    vse16.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_zero_undef_v8i16:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVD-NEXT:    vle16.v v8, (a0)
+; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
+; RVD-NEXT:    vnsrl.wi v8, v10, 23
+; RVD-NEXT:    li a1, 142
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    vse16.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_zero_undef_v8i16:
 ; ZVBB:       # %bb.0:
@@ -1455,143 +964,68 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 }
 
 define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v4i32:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 16
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV32I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v4i32:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 16
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v4i32:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 158
-; LMULMAX2-RV32F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: ctlz_zero_undef_v4i32:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 158
-; LMULMAX2-RV64F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v4i32:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vnsrl.wx v8, v10, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1054
-; LMULMAX2-RV32D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: ctlz_zero_undef_v4i32:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vnsrl.wx v8, v10, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1054
-; LMULMAX2-RV64D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_zero_undef_v4i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vnsrl.wx v8, v10, a1
-; LMULMAX8-NEXT:    li a1, 1054
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_zero_undef_v4i32:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVI-NEXT:    vle32.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 8
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 16
+; RVI-NEXT:    vor.vv v8, v8, v9
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    lui a1, 349525
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 209715
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 61681
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    lui a1, 4112
+; RVI-NEXT:    addi a1, a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 24
+; RVI-NEXT:    vse32.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_zero_undef_v4i32:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVF-NEXT:    vle32.v v8, (a0)
+; RVF-NEXT:    fsrmi a1, 1
+; RVF-NEXT:    vfcvt.f.xu.v v8, v8
+; RVF-NEXT:    fsrm a1
+; RVF-NEXT:    vsrl.vi v8, v8, 23
+; RVF-NEXT:    li a1, 158
+; RVF-NEXT:    vrsub.vx v8, v8, a1
+; RVF-NEXT:    vse32.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_zero_undef_v4i32:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVD-NEXT:    vle32.v v8, (a0)
+; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vnsrl.wx v8, v10, a1
+; RVD-NEXT:    li a1, 1054
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    vse32.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_zero_undef_v4i32:
 ; ZVBB:       # %bb.0:
@@ -1608,180 +1042,152 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
 }
 
 define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v2i64:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 16
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    li a1, 32
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v9, v10
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    li a1, 56
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v2i64:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 16
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    li a1, 32
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 56
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v2i64:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    li a1, 190
-; LMULMAX2-RV32F-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfncvt.f.xu.w v10, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v10, 23
-; LMULMAX2-RV32F-NEXT:    vwsubu.wv v9, v9, v8
-; LMULMAX2-RV32F-NEXT:    vse64.v v9, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: ctlz_zero_undef_v2i64:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    li a1, 190
-; LMULMAX2-RV64F-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfncvt.f.xu.w v10, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v10, 23
-; LMULMAX2-RV64F-NEXT:    vwsubu.vv v10, v9, v8
-; LMULMAX2-RV64F-NEXT:    vse64.v v10, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v2i64:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1086
-; LMULMAX2-RV32D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: ctlz_zero_undef_v2i64:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1086
-; LMULMAX2-RV64D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_zero_undef_v2i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    fsrmi a1, 1
-; LMULMAX8-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX8-NEXT:    fsrm a1
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 1086
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RV32I-LABEL: ctlz_zero_undef_v2i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vle64.v v8, (a0)
+; RV32I-NEXT:    vsrl.vi v9, v8, 1
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    vsrl.vi v9, v8, 2
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    vsrl.vi v9, v8, 4
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    vsrl.vi v9, v8, 8
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    vsrl.vi v9, v8, 16
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    li a1, 32
+; RV32I-NEXT:    vsrl.vx v9, v8, a1
+; RV32I-NEXT:    vor.vv v8, v8, v9
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vsrl.vi v9, v8, 1
+; RV32I-NEXT:    lui a1, 349525
+; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v9, v9, v10
+; RV32I-NEXT:    vsub.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v10, v8, v9
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    vadd.vv v8, v10, v8
+; RV32I-NEXT:    vsrl.vi v9, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vmul.vv v8, v8, v9
+; RV32I-NEXT:    li a1, 56
+; RV32I-NEXT:    vsrl.vx v8, v8, a1
+; RV32I-NEXT:    vse64.v v8, (a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: ctlz_zero_undef_v2i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64I-NEXT:    vle64.v v8, (a0)
+; RV64I-NEXT:    vsrl.vi v9, v8, 1
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    vsrl.vi v9, v8, 2
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    vsrl.vi v9, v8, 4
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    vsrl.vi v9, v8, 8
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    vsrl.vi v9, v8, 16
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    li a1, 32
+; RV64I-NEXT:    vsrl.vx v9, v8, a1
+; RV64I-NEXT:    vor.vv v8, v8, v9
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vsrl.vi v9, v8, 1
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v9, v9, a1
+; RV64I-NEXT:    vsub.vv v8, v8, v9
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v9, v8, a1
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vadd.vv v8, v9, v8
+; RV64I-NEXT:    vsrl.vi v9, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v9
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    li a1, 56
+; RV64I-NEXT:    vsrl.vx v8, v8, a1
+; RV64I-NEXT:    vse64.v v8, (a0)
+; RV64I-NEXT:    ret
+;
+; RV32F-LABEL: ctlz_zero_undef_v2i64:
+; RV32F:       # %bb.0:
+; RV32F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32F-NEXT:    vle64.v v8, (a0)
+; RV32F-NEXT:    li a1, 190
+; RV32F-NEXT:    vmv.v.x v9, a1
+; RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV32F-NEXT:    fsrmi a1, 1
+; RV32F-NEXT:    vfncvt.f.xu.w v10, v8
+; RV32F-NEXT:    fsrm a1
+; RV32F-NEXT:    vsrl.vi v8, v10, 23
+; RV32F-NEXT:    vwsubu.wv v9, v9, v8
+; RV32F-NEXT:    vse64.v v9, (a0)
+; RV32F-NEXT:    ret
+;
+; RV64F-LABEL: ctlz_zero_undef_v2i64:
+; RV64F:       # %bb.0:
+; RV64F-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV64F-NEXT:    vle64.v v8, (a0)
+; RV64F-NEXT:    li a1, 190
+; RV64F-NEXT:    vmv.v.x v9, a1
+; RV64F-NEXT:    fsrmi a1, 1
+; RV64F-NEXT:    vfncvt.f.xu.w v10, v8
+; RV64F-NEXT:    fsrm a1
+; RV64F-NEXT:    vsrl.vi v8, v10, 23
+; RV64F-NEXT:    vwsubu.vv v10, v9, v8
+; RV64F-NEXT:    vse64.v v10, (a0)
+; RV64F-NEXT:    ret
+;
+; RVD-LABEL: ctlz_zero_undef_v2i64:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RVD-NEXT:    vle64.v v8, (a0)
+; RVD-NEXT:    fsrmi a1, 1
+; RVD-NEXT:    vfcvt.f.xu.v v8, v8
+; RVD-NEXT:    fsrm a1
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vsrl.vx v8, v8, a1
+; RVD-NEXT:    li a1, 1086
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    vse64.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_zero_undef_v2i64:
 ; ZVBB:       # %bb.0:
@@ -1798,93 +1204,62 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 }
 
 define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
-; LMULMAX2-LABEL: ctlz_zero_undef_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vnot.v v8, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    li a1, 85
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    li a1, 51
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctlz_zero_undef_v32i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    li a2, 85
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    li a3, 51
-; LMULMAX1-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vnot.v v9, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vi v9, v9, 15
-; LMULMAX1-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_zero_undef_v32i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; LMULMAX8-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-NEXT:    vzext.vf2 v12, v8
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v16, v12
-; LMULMAX8-NEXT:    vnsrl.wi v8, v16, 23
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v12, v8, 0
-; LMULMAX8-NEXT:    li a1, 134
-; LMULMAX8-NEXT:    vrsub.vx v8, v12, a1
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_zero_undef_v32i8:
+; RVI:       # %bb.0:
+; RVI-NEXT:    li a1, 32
+; RVI-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RVI-NEXT:    vle8.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    li a1, 85
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    li a1, 51
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    vand.vi v8, v8, 15
+; RVI-NEXT:    vse8.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_zero_undef_v32i8:
+; RVF:       # %bb.0:
+; RVF-NEXT:    li a1, 32
+; RVF-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RVF-NEXT:    vle8.v v8, (a0)
+; RVF-NEXT:    vzext.vf2 v12, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v16, v12
+; RVF-NEXT:    vnsrl.wi v8, v16, 23
+; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; RVF-NEXT:    vnsrl.wi v12, v8, 0
+; RVF-NEXT:    li a1, 134
+; RVF-NEXT:    vrsub.vx v8, v12, a1
+; RVF-NEXT:    vse8.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_zero_undef_v32i8:
+; RVD:       # %bb.0:
+; RVD-NEXT:    li a1, 32
+; RVD-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RVD-NEXT:    vle8.v v8, (a0)
+; RVD-NEXT:    vzext.vf2 v12, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v16, v12
+; RVD-NEXT:    vnsrl.wi v8, v16, 23
+; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; RVD-NEXT:    vnsrl.wi v12, v8, 0
+; RVD-NEXT:    li a1, 134
+; RVD-NEXT:    vrsub.vx v8, v12, a1
+; RVD-NEXT:    vse8.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_zero_undef_v32i8:
 ; ZVBB:       # %bb.0:
@@ -1902,110 +1277,62 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
 }
 
 define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vnot.v v8, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    lui a1, 5
-; LMULMAX2-NEXT:    addi a1, a1, 1365
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 3
-; LMULMAX2-NEXT:    addi a1, a1, 819
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 1
-; LMULMAX2-NEXT:    addi a1, a1, -241
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    li a1, 257
-; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX1-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    lui a2, 5
-; LMULMAX1-NEXT:    addi a2, a2, 1365
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a3, 3
-; LMULMAX1-NEXT:    addi a3, a3, 819
-; LMULMAX1-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a4, 1
-; LMULMAX1-NEXT:    addi a4, a4, -241
-; LMULMAX1-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-NEXT:    li a5, 257
-; LMULMAX1-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 8
-; LMULMAX1-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vnot.v v9, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_zero_undef_v16i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v8
-; LMULMAX8-NEXT:    vnsrl.wi v8, v12, 23
-; LMULMAX8-NEXT:    li a1, 142
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_zero_undef_v16i16:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVI-NEXT:    vle16.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 8
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    lui a1, 5
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 3
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 1
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    li a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 8
+; RVI-NEXT:    vse16.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_zero_undef_v16i16:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVF-NEXT:    vle16.v v8, (a0)
+; RVF-NEXT:    vfwcvt.f.xu.v v12, v8
+; RVF-NEXT:    vnsrl.wi v8, v12, 23
+; RVF-NEXT:    li a1, 142
+; RVF-NEXT:    vrsub.vx v8, v8, a1
+; RVF-NEXT:    vse16.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_zero_undef_v16i16:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVD-NEXT:    vle16.v v8, (a0)
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
+; RVD-NEXT:    vnsrl.wi v8, v12, 23
+; RVD-NEXT:    li a1, 142
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    vse16.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_zero_undef_v16i16:
 ; ZVBB:       # %bb.0:
@@ -2022,145 +1349,68 @@ define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
 }
 
 define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v8i32:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 16
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV32I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v8i32:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 16
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v8i32:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 158
-; LMULMAX2-RV32F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: ctlz_zero_undef_v8i32:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 158
-; LMULMAX2-RV64F-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v8i32:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV32D-NEXT:    li a1, 158
-; LMULMAX2-RV32D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: ctlz_zero_undef_v8i32:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV64D-NEXT:    li a1, 158
-; LMULMAX2-RV64D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_zero_undef_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v8
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vnsrl.wx v8, v12, a1
-; LMULMAX8-NEXT:    li a1, 1054
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: ctlz_zero_undef_v8i32:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVI-NEXT:    vle32.v v8, (a0)
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 2
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 8
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 16
+; RVI-NEXT:    vor.vv v8, v8, v10
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    lui a1, 349525
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 209715
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 61681
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    lui a1, 4112
+; RVI-NEXT:    addi a1, a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 24
+; RVI-NEXT:    vse32.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: ctlz_zero_undef_v8i32:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVF-NEXT:    vle32.v v8, (a0)
+; RVF-NEXT:    fsrmi a1, 1
+; RVF-NEXT:    vfcvt.f.xu.v v8, v8
+; RVF-NEXT:    fsrm a1
+; RVF-NEXT:    vsrl.vi v8, v8, 23
+; RVF-NEXT:    li a1, 158
+; RVF-NEXT:    vrsub.vx v8, v8, a1
+; RVF-NEXT:    vse32.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: ctlz_zero_undef_v8i32:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVD-NEXT:    vle32.v v8, (a0)
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vnsrl.wx v8, v12, a1
+; RVD-NEXT:    li a1, 1054
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    vse32.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_zero_undef_v8i32:
 ; ZVBB:       # %bb.0:
@@ -2177,180 +1427,152 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
 }
 
 define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v4i64:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 16
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    li a1, 32
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v10, v12
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v12, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v12, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    li a1, 56
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v4i64:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 8
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 16
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    li a1, 32
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 56
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v4i64:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    li a1, 190
-; LMULMAX2-RV32F-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfncvt.f.xu.w v12, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v12, 23
-; LMULMAX2-RV32F-NEXT:    vwsubu.wv v10, v10, v8
-; LMULMAX2-RV32F-NEXT:    vse64.v v10, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: ctlz_zero_undef_v4i64:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    li a1, 190
-; LMULMAX2-RV64F-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfncvt.f.xu.w v11, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v11, 23
-; LMULMAX2-RV64F-NEXT:    vwsubu.vv v12, v10, v8
-; LMULMAX2-RV64F-NEXT:    vse64.v v12, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v4i64:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1086
-; LMULMAX2-RV32D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: ctlz_zero_undef_v4i64:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1086
-; LMULMAX2-RV64D-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: ctlz_zero_undef_v4i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    fsrmi a1, 1
-; LMULMAX8-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX8-NEXT:    fsrm a1
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 1086
-; LMULMAX8-NEXT:    vrsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RV32I-LABEL: ctlz_zero_undef_v4i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vle64.v v8, (a0)
+; RV32I-NEXT:    vsrl.vi v10, v8, 1
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v10, v8, 2
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v10, v8, 4
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v10, v8, 8
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v10, v8, 16
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    li a1, 32
+; RV32I-NEXT:    vsrl.vx v10, v8, a1
+; RV32I-NEXT:    vor.vv v8, v8, v10
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vsrl.vi v10, v8, 1
+; RV32I-NEXT:    lui a1, 349525
+; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v12, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v10, v10, v12
+; RV32I-NEXT:    vsub.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v12, v8, v10
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vadd.vv v8, v12, v8
+; RV32I-NEXT:    vsrl.vi v10, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vmul.vv v8, v8, v10
+; RV32I-NEXT:    li a1, 56
+; RV32I-NEXT:    vsrl.vx v8, v8, a1
+; RV32I-NEXT:    vse64.v v8, (a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: ctlz_zero_undef_v4i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64I-NEXT:    vle64.v v8, (a0)
+; RV64I-NEXT:    vsrl.vi v10, v8, 1
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    vsrl.vi v10, v8, 2
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    vsrl.vi v10, v8, 4
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    vsrl.vi v10, v8, 8
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    vsrl.vi v10, v8, 16
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    li a1, 32
+; RV64I-NEXT:    vsrl.vx v10, v8, a1
+; RV64I-NEXT:    vor.vv v8, v8, v10
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vsrl.vi v10, v8, 1
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v10, v10, a1
+; RV64I-NEXT:    vsub.vv v8, v8, v10
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v10, v8, a1
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vadd.vv v8, v10, v8
+; RV64I-NEXT:    vsrl.vi v10, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v10
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    li a1, 56
+; RV64I-NEXT:    vsrl.vx v8, v8, a1
+; RV64I-NEXT:    vse64.v v8, (a0)
+; RV64I-NEXT:    ret
+;
+; RV32F-LABEL: ctlz_zero_undef_v4i64:
+; RV32F:       # %bb.0:
+; RV32F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32F-NEXT:    vle64.v v8, (a0)
+; RV32F-NEXT:    li a1, 190
+; RV32F-NEXT:    vmv.v.x v10, a1
+; RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; RV32F-NEXT:    fsrmi a1, 1
+; RV32F-NEXT:    vfncvt.f.xu.w v12, v8
+; RV32F-NEXT:    fsrm a1
+; RV32F-NEXT:    vsrl.vi v8, v12, 23
+; RV32F-NEXT:    vwsubu.wv v10, v10, v8
+; RV32F-NEXT:    vse64.v v10, (a0)
+; RV32F-NEXT:    ret
+;
+; RV64F-LABEL: ctlz_zero_undef_v4i64:
+; RV64F:       # %bb.0:
+; RV64F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64F-NEXT:    vle64.v v8, (a0)
+; RV64F-NEXT:    li a1, 190
+; RV64F-NEXT:    vmv.v.x v10, a1
+; RV64F-NEXT:    fsrmi a1, 1
+; RV64F-NEXT:    vfncvt.f.xu.w v11, v8
+; RV64F-NEXT:    fsrm a1
+; RV64F-NEXT:    vsrl.vi v8, v11, 23
+; RV64F-NEXT:    vwsubu.vv v12, v10, v8
+; RV64F-NEXT:    vse64.v v12, (a0)
+; RV64F-NEXT:    ret
+;
+; RVD-LABEL: ctlz_zero_undef_v4i64:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RVD-NEXT:    vle64.v v8, (a0)
+; RVD-NEXT:    fsrmi a1, 1
+; RVD-NEXT:    vfcvt.f.xu.v v8, v8
+; RVD-NEXT:    fsrm a1
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vsrl.vx v8, v8, a1
+; RVD-NEXT:    li a1, 1086
+; RVD-NEXT:    vrsub.vx v8, v8, a1
+; RVD-NEXT:    vse64.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctlz_zero_undef_v4i64:
 ; ZVBB:       # %bb.0:
@@ -2366,7 +1588,5 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
   ret void
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; LMULMAX1-RV32: {{.*}}
-; LMULMAX1-RV64: {{.*}}
-; LMULMAX2-RV32: {{.*}}
-; LMULMAX2-RV64: {{.*}}
+; RV32D: {{.*}}
+; RV64D: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index 5e0c99fa1f46e0..147f560633a45f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
 
@@ -127,157 +125,81 @@ define void @ctpop_v4i32(ptr %x, ptr %y) {
 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
 
 define void @ctpop_v2i64(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: ctpop_v2i64:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    lui a1, 349525
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    vand.vv v9, v10, v9
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    lui a1, 209715
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vand.vv v10, v8, v9
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    lui a1, 61681
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    lui a1, 4112
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vmul.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    li a1, 56
-; LMULMAX2-RV32-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: ctpop_v2i64:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 349525
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    lui a1, 209715
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    lui a1, 61681
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    lui a1, 4112
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    li a1, 56
-; LMULMAX2-RV64-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v2i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    lui a1, 349525
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX1-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v9, a1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 209715
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX1-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v9, a1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 61681
-; LMULMAX1-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX1-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v9, a1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    lui a1, 4112
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 257
-; LMULMAX1-RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v9, a1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    li a1, 56
-; LMULMAX1-RV32-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctpop_v2i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a1, 349525
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX1-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX1-RV64-NEXT:    add a1, a1, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 209715
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX1-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX1-RV64-NEXT:    add a1, a1, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    lui a1, 61681
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX1-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX1-RV64-NEXT:    add a1, a1, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    lui a1, 4112
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, 257
-; LMULMAX1-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX1-RV64-NEXT:    add a1, a1, a2
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    li a1, 56
-; LMULMAX1-RV64-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; RV32-LABEL: ctpop_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v8, 1
+; RV32-NEXT:    vand.vv v9, v10, v9
+; RV32-NEXT:    vsub.vv v8, v8, v9
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v10, v8, v9
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v9
+; RV32-NEXT:    vadd.vv v8, v10, v8
+; RV32-NEXT:    vsrl.vi v9, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v9
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v9
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a1
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v9
+; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a1
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ctpop_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vsrl.vi v9, v8, 1
+; RV64-NEXT:    lui a1, 349525
+; RV64-NEXT:    addiw a1, a1, 1365
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vsub.vv v8, v8, v9
+; RV64-NEXT:    lui a1, 209715
+; RV64-NEXT:    addiw a1, a1, 819
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vand.vx v9, v8, a1
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vadd.vv v8, v9, v8
+; RV64-NEXT:    vsrl.vi v9, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v9
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    lui a1, 4112
+; RV64-NEXT:    addiw a1, a1, 257
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vmul.vx v8, v8, a1
+; RV64-NEXT:    li a1, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a1
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v2i64:
 ; ZVBB:       # %bb.0:
@@ -295,57 +217,25 @@ define void @ctpop_v2i64(ptr %x, ptr %y) {
 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
 
 define void @ctpop_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    li a1, 85
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    li a1, 51
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctpop_v32i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    li a2, 85
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    li a3, 51
-; LMULMAX1-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vi v9, v9, 15
-; LMULMAX1-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ctpop_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    li a1, 85
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    li a1, 51
+; CHECK-NEXT:    vand.vx v10, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vand.vi v8, v8, 15
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v32i8:
 ; ZVBB:       # %bb.0:
@@ -364,72 +254,31 @@ define void @ctpop_v32i8(ptr %x, ptr %y) {
 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
 
 define void @ctpop_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    lui a1, 5
-; LMULMAX2-NEXT:    addi a1, a1, 1365
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 3
-; LMULMAX2-NEXT:    addi a1, a1, 819
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 1
-; LMULMAX2-NEXT:    addi a1, a1, -241
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    li a1, 257
-; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctpop_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    lui a2, 5
-; LMULMAX1-NEXT:    addi a2, a2, 1365
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a3, 3
-; LMULMAX1-NEXT:    addi a3, a3, 819
-; LMULMAX1-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a4, 1
-; LMULMAX1-NEXT:    addi a4, a4, -241
-; LMULMAX1-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-NEXT:    li a5, 257
-; LMULMAX1-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ctpop_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a1, 5
+; CHECK-NEXT:    addi a1, a1, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a1, 3
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    vand.vx v10, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a1, 1
+; CHECK-NEXT:    addi a1, a1, -241
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    li a1, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 8
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v16i16:
 ; ZVBB:       # %bb.0:
@@ -447,74 +296,32 @@ define void @ctpop_v16i16(ptr %x, ptr %y) {
 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
 
 define void @ctpop_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    lui a1, 349525
-; LMULMAX2-NEXT:    addi a1, a1, 1365
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 209715
-; LMULMAX2-NEXT:    addi a1, a1, 819
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 61681
-; LMULMAX2-NEXT:    addi a1, a1, -241
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    lui a1, 4112
-; LMULMAX2-NEXT:    addi a1, a1, 257
-; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctpop_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    lui a2, 349525
-; LMULMAX1-NEXT:    addi a2, a2, 1365
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a3, 209715
-; LMULMAX1-NEXT:    addi a3, a3, 819
-; LMULMAX1-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a4, 61681
-; LMULMAX1-NEXT:    addi a4, a4, -241
-; LMULMAX1-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-NEXT:    lui a5, 4112
-; LMULMAX1-NEXT:    addi a5, a5, 257
-; LMULMAX1-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 24
-; LMULMAX1-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ctpop_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-NEXT:    lui a1, 349525
+; CHECK-NEXT:    addi a1, a1, 1365
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    lui a1, 209715
+; CHECK-NEXT:    addi a1, a1, 819
+; CHECK-NEXT:    vand.vx v10, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a1, 61681
+; CHECK-NEXT:    addi a1, a1, -241
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    lui a1, 4112
+; CHECK-NEXT:    addi a1, a1, 257
+; CHECK-NEXT:    vmul.vx v8, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 24
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v8i32:
 ; ZVBB:       # %bb.0:
@@ -530,38 +337,14 @@ define void @ctpop_v8i32(ptr %x, ptr %y) {
   ret void
 }
 define <8 x i1> @ctpop_v8i32_ult_two(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v8i32_ult_two:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctpop_v8i32_ult_two:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX1-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vadd.vi v10, v9, -1
-; LMULMAX1-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-NEXT:    vmseq.vi v0, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ctpop_v8i32_ult_two:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vadd.vi v10, v8, -1
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v8i32_ult_two:
 ; ZVBB:       # %bb.0:
@@ -577,38 +360,14 @@ define <8 x i1> @ctpop_v8i32_ult_two(ptr %x, ptr %y) {
   ret <8 x i1> %cmp
 }
 define <8 x i1> @ctpop_v8i32_ugt_one(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v8i32_ugt_one:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctpop_v8i32_ugt_one:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX1-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vadd.vi v10, v9, -1
-; LMULMAX1-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ctpop_v8i32_ugt_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vadd.vi v10, v8, -1
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v8i32_ugt_one:
 ; ZVBB:       # %bb.0:
@@ -624,38 +383,14 @@ define <8 x i1> @ctpop_v8i32_ugt_one(ptr %x, ptr %y) {
   ret <8 x i1> %cmp
 }
 define <8 x i1> @ctpop_v8i32_eq_one(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v8i32_eq_one:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmsltu.vv v0, v10, v8
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctpop_v8i32_eq_one:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX1-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vmsltu.vv v0, v10, v8
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vadd.vi v10, v9, -1
-; LMULMAX1-NEXT:    vxor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vmsltu.vv v0, v10, v9
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ctpop_v8i32_eq_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vadd.vi v10, v8, -1
+; CHECK-NEXT:    vxor.vv v8, v8, v10
+; CHECK-NEXT:    vmsltu.vv v0, v10, v8
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v8i32_eq_one:
 ; ZVBB:       # %bb.0:
@@ -671,38 +406,14 @@ define <8 x i1> @ctpop_v8i32_eq_one(ptr %x, ptr %y) {
   ret <8 x i1> %cmp
 }
 define <8 x i1> @ctpop_v8i32_ne_one(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v8i32_ne_one:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmsleu.vv v0, v8, v10
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: ctpop_v8i32_ne_one:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX1-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX1-NEXT:    vmsleu.vv v0, v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vadd.vi v10, v9, -1
-; LMULMAX1-NEXT:    vxor.vv v9, v9, v10
-; LMULMAX1-NEXT:    vmsleu.vv v0, v9, v10
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ctpop_v8i32_ne_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vadd.vi v10, v8, -1
+; CHECK-NEXT:    vxor.vv v8, v8, v10
+; CHECK-NEXT:    vmsleu.vv v0, v8, v10
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v8i32_ne_one:
 ; ZVBB:       # %bb.0:
@@ -720,187 +431,81 @@ define <8 x i1> @ctpop_v8i32_ne_one(ptr %x, ptr %y) {
 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
 
 define void @ctpop_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: ctpop_v4i64:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    lui a1, 349525
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vsrl.vi v12, v8, 1
-; LMULMAX2-RV32-NEXT:    vand.vv v10, v12, v10
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 209715
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vand.vv v12, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v12, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 61681
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 4112
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmul.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    li a1, 56
-; LMULMAX2-RV32-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: ctpop_v4i64:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    lui a1, 349525
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 209715
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 61681
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    lui a1, 4112
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    li a1, 56
-; LMULMAX2-RV64-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    lui a2, 349525
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 1365
-; LMULMAX1-RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v10, a2
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vv v11, v11, v10
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    lui a2, 209715
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 819
-; LMULMAX1-RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v11, a2
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vand.vv v12, v9, v11
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v12, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v12, v9, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v12
-; LMULMAX1-RV32-NEXT:    lui a2, 61681
-; LMULMAX1-RV32-NEXT:    addi a2, a2, -241
-; LMULMAX1-RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v12, a2
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v12
-; LMULMAX1-RV32-NEXT:    lui a2, 4112
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 257
-; LMULMAX1-RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v13, a2
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmul.vv v9, v9, v13
-; LMULMAX1-RV32-NEXT:    li a2, 56
-; LMULMAX1-RV32-NEXT:    vsrl.vx v9, v9, a2
-; LMULMAX1-RV32-NEXT:    vsrl.vi v14, v8, 1
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v14, v10
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v8, v11
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v12
-; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v13
-; LMULMAX1-RV32-NEXT:    vsrl.vx v8, v8, a2
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctpop_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a2, 349525
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, 1365
-; LMULMAX1-RV64-NEXT:    slli a3, a2, 32
-; LMULMAX1-RV64-NEXT:    add a2, a2, a3
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a3, 209715
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, 819
-; LMULMAX1-RV64-NEXT:    slli a4, a3, 32
-; LMULMAX1-RV64-NEXT:    add a3, a3, a4
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    lui a4, 61681
-; LMULMAX1-RV64-NEXT:    addiw a4, a4, -241
-; LMULMAX1-RV64-NEXT:    slli a5, a4, 32
-; LMULMAX1-RV64-NEXT:    add a4, a4, a5
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    lui a5, 4112
-; LMULMAX1-RV64-NEXT:    addiw a5, a5, 257
-; LMULMAX1-RV64-NEXT:    slli a6, a5, 32
-; LMULMAX1-RV64-NEXT:    add a5, a5, a6
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    li a6, 56
-; LMULMAX1-RV64-NEXT:    vsrl.vx v8, v8, a6
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vsrl.vx v9, v9, a6
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; RV32-LABEL: ctpop_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vv v10, v12, v10
+; RV32-NEXT:    vsub.vv v8, v8, v10
+; RV32-NEXT:    lui a1, 209715
+; RV32-NEXT:    addi a1, a1, 819
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v12, v8, v10
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vadd.vv v8, v12, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v10
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    lui a1, 4112
+; RV32-NEXT:    addi a1, a1, 257
+; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vmul.vv v8, v8, v10
+; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a1
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ctpop_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vsrl.vi v10, v8, 1
+; RV64-NEXT:    lui a1, 349525
+; RV64-NEXT:    addiw a1, a1, 1365
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vsub.vv v8, v8, v10
+; RV64-NEXT:    lui a1, 209715
+; RV64-NEXT:    addiw a1, a1, 819
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vand.vx v10, v8, a1
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vadd.vv v8, v10, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v10
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    lui a1, 4112
+; RV64-NEXT:    addiw a1, a1, 257
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vmul.vx v8, v8, a1
+; RV64-NEXT:    li a1, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a1
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v4i64:
 ; ZVBB:       # %bb.0:
@@ -916,68 +521,14 @@ define void @ctpop_v4i64(ptr %x, ptr %y) {
   ret void
 }
 define <4 x i1> @ctpop_v4i64_ult_two(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v4i64_ult_two:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v4i64_ult_two:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v10, -1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v11, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v11, 0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmseq.vv v0, v9, v11
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v10, v8, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vmseq.vv v0, v8, v11
-; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV32-NEXT:    vslideup.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctpop_v4i64_ult_two:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vadd.vi v10, v9, -1
-; LMULMAX1-RV64-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vmseq.vi v0, v9, 0
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV64-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-RV64-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_v4i64_ult_two:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vadd.vi v10, v8, -1
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v4i64_ult_two:
 ; ZVBB:       # %bb.0:
@@ -993,68 +544,14 @@ define <4 x i1> @ctpop_v4i64_ult_two(ptr %x, ptr %y) {
   ret <4 x i1> %cmp
 }
 define <4 x i1> @ctpop_v4i64_ugt_one(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v4i64_ugt_one:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v4i64_ugt_one:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v10, -1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v11, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v11, 0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmsne.vv v0, v9, v11
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v10, v8, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vmsne.vv v0, v8, v11
-; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV32-NEXT:    vslideup.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctpop_v4i64_ugt_one:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vadd.vi v10, v9, -1
-; LMULMAX1-RV64-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV64-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-RV64-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_v4i64_ugt_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vadd.vi v10, v8, -1
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v4i64_ugt_one:
 ; ZVBB:       # %bb.0:
@@ -1070,65 +567,14 @@ define <4 x i1> @ctpop_v4i64_ugt_one(ptr %x, ptr %y) {
   ret <4 x i1> %cmp
 }
 define <4 x i1> @ctpop_v4i64_eq_one(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v4i64_eq_one:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmsltu.vv v0, v10, v8
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v4i64_eq_one:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v10, -1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v11, v9, v10
-; LMULMAX1-RV32-NEXT:    vxor.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vmsltu.vv v0, v11, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v10, v8, v10
-; LMULMAX1-RV32-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vmsltu.vv v0, v10, v8
-; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV32-NEXT:    vslideup.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctpop_v4i64_eq_one:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX1-RV64-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vmsltu.vv v0, v10, v8
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vadd.vi v10, v9, -1
-; LMULMAX1-RV64-NEXT:    vxor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vmsltu.vv v0, v10, v9
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV64-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-RV64-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_v4i64_eq_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vadd.vi v10, v8, -1
+; CHECK-NEXT:    vxor.vv v8, v8, v10
+; CHECK-NEXT:    vmsltu.vv v0, v10, v8
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v4i64_eq_one:
 ; ZVBB:       # %bb.0:
@@ -1144,65 +590,14 @@ define <4 x i1> @ctpop_v4i64_eq_one(ptr %x, ptr %y) {
   ret <4 x i1> %cmp
 }
 define <4 x i1> @ctpop_v4i64_ne_one(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ctpop_v4i64_ne_one:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmsleu.vv v0, v8, v10
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctpop_v4i64_ne_one:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v10, -1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v11, v9, v10
-; LMULMAX1-RV32-NEXT:    vxor.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vmsleu.vv v0, v9, v11
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v10, v8, v10
-; LMULMAX1-RV32-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vmsleu.vv v0, v8, v10
-; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV32-NEXT:    vslideup.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctpop_v4i64_ne_one:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vadd.vi v10, v8, -1
-; LMULMAX1-RV64-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vmsleu.vv v0, v8, v10
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vadd.vi v10, v9, -1
-; LMULMAX1-RV64-NEXT:    vxor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vmsleu.vv v0, v9, v10
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-RV64-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-RV64-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-RV64-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ctpop_v4i64_ne_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vadd.vi v10, v8, -1
+; CHECK-NEXT:    vxor.vv v8, v8, v10
+; CHECK-NEXT:    vmsleu.vv v0, v8, v10
+; CHECK-NEXT:    ret
 ;
 ; ZVBB-LABEL: ctpop_v4i64_ne_one:
 ; ZVBB:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 5afd935935e5d9..5802fba2f24545 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -1,61 +1,74 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32I
-; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32F
-; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64F
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV32F
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV64F
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV32D
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV64D
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
 
 define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: cttz_v16i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    li a1, 1
-; CHECK-NEXT:    vsub.vx v9, v8, a1
-; CHECK-NEXT:    vnot.v v8, v8
-; CHECK-NEXT:    vand.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    li a1, 85
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsub.vv v8, v8, v9
-; CHECK-NEXT:    li a1, 51
-; CHECK-NEXT:    vand.vx v9, v8, a1
-; CHECK-NEXT:    vsrl.vi v8, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vadd.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vadd.vv v8, v8, v9
-; CHECK-NEXT:    vand.vi v8, v8, 15
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_v16i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX8-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-NEXT:    vand.vv v9, v8, v9
-; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vzext.vf2 v10, v9
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-NEXT:    vnsrl.wi v10, v12, 23
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v9, v10, 0
-; LMULMAX8-NEXT:    li a1, 127
-; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-NEXT:    vsub.vx v8, v9, a1
-; LMULMAX8-NEXT:    vmerge.vim v8, v8, 8, v0
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_v16i8:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RVI-NEXT:    vle8.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v9, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    li a1, 85
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    li a1, 51
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    vand.vi v8, v8, 15
+; RVI-NEXT:    vse8.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_v16i8:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RVF-NEXT:    vle8.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v9, v8, 0
+; RVF-NEXT:    vand.vv v9, v8, v9
+; RVF-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RVF-NEXT:    vzext.vf2 v10, v9
+; RVF-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVF-NEXT:    vnsrl.wi v10, v12, 23
+; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RVF-NEXT:    vnsrl.wi v9, v10, 0
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vmseq.vi v0, v8, 0
+; RVF-NEXT:    vsub.vx v8, v9, a1
+; RVF-NEXT:    vmerge.vim v8, v8, 8, v0
+; RVF-NEXT:    vse8.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_v16i8:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RVD-NEXT:    vle8.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v9, v8, 0
+; RVD-NEXT:    vand.vv v9, v8, v9
+; RVD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RVD-NEXT:    vzext.vf2 v10, v9
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVD-NEXT:    vnsrl.wi v10, v12, 23
+; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RVD-NEXT:    vnsrl.wi v9, v10, 0
+; RVD-NEXT:    li a1, 127
+; RVD-NEXT:    vmseq.vi v0, v8, 0
+; RVD-NEXT:    vsub.vx v8, v9, a1
+; RVD-NEXT:    vmerge.vim v8, v8, 8, v0
+; RVD-NEXT:    vse8.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_v16i8:
 ; ZVBB:       # %bb.0:
@@ -73,175 +86,67 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
 declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
 
 define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: cttz_v8i16:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 5
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 3
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 1
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    li a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV32I-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: cttz_v8i16:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    li a1, 1
-; LMULMAX2-RV64I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 5
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 3
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 1
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV64I-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX1-LABEL: cttz_v8i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-NEXT:    li a1, 1
-; LMULMAX1-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-NEXT:    lui a1, 5
-; LMULMAX1-NEXT:    addi a1, a1, 1365
-; LMULMAX1-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-NEXT:    lui a1, 3
-; LMULMAX1-NEXT:    addi a1, a1, 819
-; LMULMAX1-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-NEXT:    lui a1, 1
-; LMULMAX1-NEXT:    addi a1, a1, -241
-; LMULMAX1-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-NEXT:    li a1, 257
-; LMULMAX1-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: cttz_v8i16:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32F-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV32F-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX2-RV32F-NEXT:    vnsrl.wi v9, v10, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 127
-; LMULMAX2-RV32F-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX2-RV32F-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV32F-NEXT:    li a1, 16
-; LMULMAX2-RV32F-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV32F-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: cttz_v8i16:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64F-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV64F-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX2-RV64F-NEXT:    vnsrl.wi v9, v10, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 127
-; LMULMAX2-RV64F-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX2-RV64F-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV64F-NEXT:    li a1, 16
-; LMULMAX2-RV64F-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV64F-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: cttz_v8i16:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32D-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV32D-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX2-RV32D-NEXT:    vnsrl.wi v9, v10, 23
-; LMULMAX2-RV32D-NEXT:    li a1, 127
-; LMULMAX2-RV32D-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX2-RV32D-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV32D-NEXT:    li a1, 16
-; LMULMAX2-RV32D-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV32D-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: cttz_v8i16:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64D-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX2-RV64D-NEXT:    vnsrl.wi v9, v10, 23
-; LMULMAX2-RV64D-NEXT:    li a1, 127
-; LMULMAX2-RV64D-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX2-RV64D-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV64D-NEXT:    li a1, 16
-; LMULMAX2-RV64D-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV64D-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_v8i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-NEXT:    vand.vv v9, v8, v9
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX8-NEXT:    vnsrl.wi v9, v10, 23
-; LMULMAX8-NEXT:    li a1, 127
-; LMULMAX8-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-NEXT:    li a1, 16
-; LMULMAX8-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_v8i16:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVI-NEXT:    vle16.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v9, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    lui a1, 5
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 3
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 1
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    li a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 8
+; RVI-NEXT:    vse16.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_v8i16:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVF-NEXT:    vle16.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v9, v8, 0
+; RVF-NEXT:    vand.vv v9, v8, v9
+; RVF-NEXT:    vfwcvt.f.xu.v v10, v9
+; RVF-NEXT:    vnsrl.wi v9, v10, 23
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vsub.vx v9, v9, a1
+; RVF-NEXT:    vmseq.vi v0, v8, 0
+; RVF-NEXT:    li a1, 16
+; RVF-NEXT:    vmerge.vxm v8, v9, a1, v0
+; RVF-NEXT:    vse16.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_v8i16:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVD-NEXT:    vle16.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v9, v8, 0
+; RVD-NEXT:    vand.vv v9, v8, v9
+; RVD-NEXT:    vfwcvt.f.xu.v v10, v9
+; RVD-NEXT:    vnsrl.wi v9, v10, 23
+; RVD-NEXT:    li a1, 127
+; RVD-NEXT:    vsub.vx v9, v9, a1
+; RVD-NEXT:    vmseq.vi v0, v8, 0
+; RVD-NEXT:    li a1, 16
+; RVD-NEXT:    vmerge.vxm v8, v9, a1, v0
+; RVD-NEXT:    vse16.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_v8i16:
 ; ZVBB:       # %bb.0:
@@ -259,154 +164,71 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
 declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
 
 define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: cttz_v4i32:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV32I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: cttz_v4i32:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    li a1, 1
-; LMULMAX2-RV64I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: cttz_v4i32:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32F-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfcvt.f.xu.v v9, v9
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v9, v9, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 127
-; LMULMAX2-RV32F-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX2-RV32F-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV32F-NEXT:    li a1, 32
-; LMULMAX2-RV32F-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV32F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: cttz_v4i32:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64F-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfcvt.f.xu.v v9, v9
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v9, v9, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 127
-; LMULMAX2-RV64F-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX2-RV64F-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV64F-NEXT:    li a1, 32
-; LMULMAX2-RV64F-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV64F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: cttz_v4i32:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32D-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV32D-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vnsrl.wx v9, v10, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1023
-; LMULMAX2-RV32D-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX2-RV32D-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV32D-NEXT:    li a1, 32
-; LMULMAX2-RV32D-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV32D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: cttz_v4i32:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64D-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vnsrl.wx v9, v10, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1023
-; LMULMAX2-RV64D-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX2-RV64D-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV64D-NEXT:    li a1, 32
-; LMULMAX2-RV64D-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV64D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_v4i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-NEXT:    vand.vv v9, v8, v9
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vnsrl.wx v9, v10, a1
-; LMULMAX8-NEXT:    li a1, 1023
-; LMULMAX8-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_v4i32:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVI-NEXT:    vle32.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v9, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    lui a1, 349525
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 209715
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 61681
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    lui a1, 4112
+; RVI-NEXT:    addi a1, a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 24
+; RVI-NEXT:    vse32.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_v4i32:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVF-NEXT:    vle32.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v9, v8, 0
+; RVF-NEXT:    vand.vv v9, v8, v9
+; RVF-NEXT:    fsrmi a1, 1
+; RVF-NEXT:    vfcvt.f.xu.v v9, v9
+; RVF-NEXT:    fsrm a1
+; RVF-NEXT:    vsrl.vi v9, v9, 23
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vsub.vx v9, v9, a1
+; RVF-NEXT:    vmseq.vi v0, v8, 0
+; RVF-NEXT:    li a1, 32
+; RVF-NEXT:    vmerge.vxm v8, v9, a1, v0
+; RVF-NEXT:    vse32.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_v4i32:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVD-NEXT:    vle32.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v9, v8, 0
+; RVD-NEXT:    vand.vv v9, v8, v9
+; RVD-NEXT:    vfwcvt.f.xu.v v10, v9
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vnsrl.wx v9, v10, a1
+; RVD-NEXT:    li a1, 1023
+; RVD-NEXT:    vsub.vx v9, v9, a1
+; RVD-NEXT:    vmseq.vi v0, v8, 0
+; RVD-NEXT:    li a1, 32
+; RVD-NEXT:    vmerge.vxm v8, v9, a1, v0
+; RVD-NEXT:    vse32.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_v4i32:
 ; ZVBB:       # %bb.0:
@@ -424,187 +246,149 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
 
 define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: cttz_v2i64:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v9, v10
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    li a1, 56
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: cttz_v2i64:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    li a1, 1
-; LMULMAX2-RV64I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 56
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: cttz_v2i64:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32F-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfncvt.f.xu.w v10, v9
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v9, v10, 23
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vzext.vf2 v10, v9
-; LMULMAX2-RV32F-NEXT:    li a1, 127
-; LMULMAX2-RV32F-NEXT:    vsub.vx v9, v10, a1
-; LMULMAX2-RV32F-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV32F-NEXT:    li a1, 64
-; LMULMAX2-RV32F-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV32F-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: cttz_v2i64:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64F-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV64F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfncvt.f.xu.w v10, v9
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v9, v10, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 127
-; LMULMAX2-RV64F-NEXT:    vwsubu.vx v10, v9, a1
-; LMULMAX2-RV64F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV64F-NEXT:    li a1, 64
-; LMULMAX2-RV64F-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX2-RV64F-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: cttz_v2i64:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32D-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v9, v9
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vsrl.vx v9, v9, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1023
-; LMULMAX2-RV32D-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX2-RV32D-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV32D-NEXT:    li a1, 64
-; LMULMAX2-RV32D-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV32D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: cttz_v2i64:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64D-NEXT:    vand.vv v9, v8, v9
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v9, v9
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vsrl.vx v9, v9, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1023
-; LMULMAX2-RV64D-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX2-RV64D-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV64D-NEXT:    li a1, 64
-; LMULMAX2-RV64D-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX2-RV64D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_v2i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-NEXT:    vand.vv v9, v8, v9
-; LMULMAX8-NEXT:    fsrmi a1, 1
-; LMULMAX8-NEXT:    vfcvt.f.xu.v v9, v9
-; LMULMAX8-NEXT:    fsrm a1
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vsrl.vx v9, v9, a1
-; LMULMAX8-NEXT:    li a1, 1023
-; LMULMAX8-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-NEXT:    li a1, 64
-; LMULMAX8-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RV32I-LABEL: cttz_v2i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vle64.v v8, (a0)
+; RV32I-NEXT:    li a1, 1
+; RV32I-NEXT:    vsub.vx v9, v8, a1
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    vsrl.vi v9, v8, 1
+; RV32I-NEXT:    lui a1, 349525
+; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v9, v9, v10
+; RV32I-NEXT:    vsub.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v10, v8, v9
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    vadd.vv v8, v10, v8
+; RV32I-NEXT:    vsrl.vi v9, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vmul.vv v8, v8, v9
+; RV32I-NEXT:    li a1, 56
+; RV32I-NEXT:    vsrl.vx v8, v8, a1
+; RV32I-NEXT:    vse64.v v8, (a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cttz_v2i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64I-NEXT:    vle64.v v8, (a0)
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    vsub.vx v9, v8, a1
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vand.vv v8, v8, v9
+; RV64I-NEXT:    vsrl.vi v9, v8, 1
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v9, v9, a1
+; RV64I-NEXT:    vsub.vv v8, v8, v9
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v9, v8, a1
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vadd.vv v8, v9, v8
+; RV64I-NEXT:    vsrl.vi v9, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v9
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    li a1, 56
+; RV64I-NEXT:    vsrl.vx v8, v8, a1
+; RV64I-NEXT:    vse64.v v8, (a0)
+; RV64I-NEXT:    ret
+;
+; RV32F-LABEL: cttz_v2i64:
+; RV32F:       # %bb.0:
+; RV32F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32F-NEXT:    vle64.v v8, (a0)
+; RV32F-NEXT:    vrsub.vi v9, v8, 0
+; RV32F-NEXT:    vand.vv v9, v8, v9
+; RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV32F-NEXT:    fsrmi a1, 1
+; RV32F-NEXT:    vfncvt.f.xu.w v10, v9
+; RV32F-NEXT:    fsrm a1
+; RV32F-NEXT:    vsrl.vi v9, v10, 23
+; RV32F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV32F-NEXT:    vzext.vf2 v10, v9
+; RV32F-NEXT:    li a1, 127
+; RV32F-NEXT:    vsub.vx v9, v10, a1
+; RV32F-NEXT:    vmseq.vi v0, v8, 0
+; RV32F-NEXT:    li a1, 64
+; RV32F-NEXT:    vmerge.vxm v8, v9, a1, v0
+; RV32F-NEXT:    vse64.v v8, (a0)
+; RV32F-NEXT:    ret
+;
+; RV64F-LABEL: cttz_v2i64:
+; RV64F:       # %bb.0:
+; RV64F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64F-NEXT:    vle64.v v8, (a0)
+; RV64F-NEXT:    vrsub.vi v9, v8, 0
+; RV64F-NEXT:    vand.vv v9, v8, v9
+; RV64F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV64F-NEXT:    fsrmi a1, 1
+; RV64F-NEXT:    vfncvt.f.xu.w v10, v9
+; RV64F-NEXT:    fsrm a1
+; RV64F-NEXT:    vsrl.vi v9, v10, 23
+; RV64F-NEXT:    li a1, 127
+; RV64F-NEXT:    vwsubu.vx v10, v9, a1
+; RV64F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV64F-NEXT:    vmseq.vi v0, v8, 0
+; RV64F-NEXT:    li a1, 64
+; RV64F-NEXT:    vmerge.vxm v8, v10, a1, v0
+; RV64F-NEXT:    vse64.v v8, (a0)
+; RV64F-NEXT:    ret
+;
+; RVD-LABEL: cttz_v2i64:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RVD-NEXT:    vle64.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v9, v8, 0
+; RVD-NEXT:    vand.vv v9, v8, v9
+; RVD-NEXT:    fsrmi a1, 1
+; RVD-NEXT:    vfcvt.f.xu.v v9, v9
+; RVD-NEXT:    fsrm a1
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vsrl.vx v9, v9, a1
+; RVD-NEXT:    li a1, 1023
+; RVD-NEXT:    vsub.vx v9, v9, a1
+; RVD-NEXT:    vmseq.vi v0, v8, 0
+; RVD-NEXT:    li a1, 64
+; RVD-NEXT:    vmerge.vxm v8, v9, a1, v0
+; RVD-NEXT:    vse64.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_v2i64:
 ; ZVBB:       # %bb.0:
@@ -622,88 +406,69 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
 
 define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
-; LMULMAX2-LABEL: cttz_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    li a1, 1
-; LMULMAX2-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-NEXT:    vnot.v v8, v8
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    li a1, 85
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    li a1, 51
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: cttz_v32i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-NEXT:    li a2, 1
-; LMULMAX1-NEXT:    vsub.vx v10, v8, a2
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    li a3, 85
-; LMULMAX1-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    li a4, 51
-; LMULMAX1-NEXT:    vand.vx v10, v8, a4
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-NEXT:    vsub.vx v10, v9, a2
-; LMULMAX1-NEXT:    vnot.v v9, v9
-; LMULMAX1-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a4
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vi v9, v9, 15
-; LMULMAX1-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_v32i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-NEXT:    vand.vv v10, v8, v10
-; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; LMULMAX8-NEXT:    vzext.vf2 v12, v10
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v16, v12
-; LMULMAX8-NEXT:    vnsrl.wi v12, v16, 23
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v10, v12, 0
-; LMULMAX8-NEXT:    li a1, 127
-; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-NEXT:    vsub.vx v8, v10, a1
-; LMULMAX8-NEXT:    vmerge.vim v8, v8, 8, v0
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_v32i8:
+; RVI:       # %bb.0:
+; RVI-NEXT:    li a1, 32
+; RVI-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RVI-NEXT:    vle8.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v10, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    li a1, 85
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    li a1, 51
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    vand.vi v8, v8, 15
+; RVI-NEXT:    vse8.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_v32i8:
+; RVF:       # %bb.0:
+; RVF-NEXT:    li a1, 32
+; RVF-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RVF-NEXT:    vle8.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v10, v8, 0
+; RVF-NEXT:    vand.vv v10, v8, v10
+; RVF-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; RVF-NEXT:    vzext.vf2 v12, v10
+; RVF-NEXT:    vfwcvt.f.xu.v v16, v12
+; RVF-NEXT:    vnsrl.wi v12, v16, 23
+; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; RVF-NEXT:    vnsrl.wi v10, v12, 0
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vmseq.vi v0, v8, 0
+; RVF-NEXT:    vsub.vx v8, v10, a1
+; RVF-NEXT:    vmerge.vim v8, v8, 8, v0
+; RVF-NEXT:    vse8.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_v32i8:
+; RVD:       # %bb.0:
+; RVD-NEXT:    li a1, 32
+; RVD-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RVD-NEXT:    vle8.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v10, v8, 0
+; RVD-NEXT:    vand.vv v10, v8, v10
+; RVD-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; RVD-NEXT:    vzext.vf2 v12, v10
+; RVD-NEXT:    vfwcvt.f.xu.v v16, v12
+; RVD-NEXT:    vnsrl.wi v12, v16, 23
+; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; RVD-NEXT:    vnsrl.wi v10, v12, 0
+; RVD-NEXT:    li a1, 127
+; RVD-NEXT:    vmseq.vi v0, v8, 0
+; RVD-NEXT:    vsub.vx v8, v10, a1
+; RVD-NEXT:    vmerge.vim v8, v8, 8, v0
+; RVD-NEXT:    vse8.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_v32i8:
 ; ZVBB:       # %bb.0:
@@ -722,99 +487,67 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
 
 define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-LABEL: cttz_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    li a1, 1
-; LMULMAX2-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-NEXT:    vnot.v v8, v8
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    lui a1, 5
-; LMULMAX2-NEXT:    addi a1, a1, 1365
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 3
-; LMULMAX2-NEXT:    addi a1, a1, 819
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 1
-; LMULMAX2-NEXT:    addi a1, a1, -241
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    li a1, 257
-; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: cttz_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-NEXT:    li a2, 1
-; LMULMAX1-NEXT:    vsub.vx v10, v8, a2
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    lui a3, 5
-; LMULMAX1-NEXT:    addi a3, a3, 1365
-; LMULMAX1-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a4, 3
-; LMULMAX1-NEXT:    addi a4, a4, 819
-; LMULMAX1-NEXT:    vand.vx v10, v8, a4
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a5, 1
-; LMULMAX1-NEXT:    addi a5, a5, -241
-; LMULMAX1-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-NEXT:    li a6, 257
-; LMULMAX1-NEXT:    vmul.vx v8, v8, a6
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-NEXT:    vsub.vx v10, v9, a2
-; LMULMAX1-NEXT:    vnot.v v9, v9
-; LMULMAX1-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a4
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-NEXT:    vmul.vx v9, v9, a6
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_v16i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-NEXT:    vand.vv v10, v8, v10
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-NEXT:    vnsrl.wi v10, v12, 23
-; LMULMAX8-NEXT:    li a1, 127
-; LMULMAX8-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-NEXT:    li a1, 16
-; LMULMAX8-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_v16i16:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVI-NEXT:    vle16.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v10, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    lui a1, 5
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 3
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 1
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    li a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 8
+; RVI-NEXT:    vse16.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_v16i16:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVF-NEXT:    vle16.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v10, v8, 0
+; RVF-NEXT:    vand.vv v10, v8, v10
+; RVF-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVF-NEXT:    vnsrl.wi v10, v12, 23
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vsub.vx v10, v10, a1
+; RVF-NEXT:    vmseq.vi v0, v8, 0
+; RVF-NEXT:    li a1, 16
+; RVF-NEXT:    vmerge.vxm v8, v10, a1, v0
+; RVF-NEXT:    vse16.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_v16i16:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVD-NEXT:    vle16.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v10, v8, 0
+; RVD-NEXT:    vand.vv v10, v8, v10
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVD-NEXT:    vnsrl.wi v10, v12, 23
+; RVD-NEXT:    li a1, 127
+; RVD-NEXT:    vsub.vx v10, v10, a1
+; RVD-NEXT:    vmseq.vi v0, v8, 0
+; RVD-NEXT:    li a1, 16
+; RVD-NEXT:    vmerge.vxm v8, v10, a1, v0
+; RVD-NEXT:    vse16.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_v16i16:
 ; ZVBB:       # %bb.0:
@@ -832,156 +565,71 @@ define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
 
 define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: cttz_v8i32:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV32I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: cttz_v8i32:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    li a1, 1
-; LMULMAX2-RV64I-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: cttz_v8i32:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV32F-NEXT:    vand.vv v10, v8, v10
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfcvt.f.xu.v v10, v10
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v10, v10, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 127
-; LMULMAX2-RV32F-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX2-RV32F-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV32F-NEXT:    li a1, 32
-; LMULMAX2-RV32F-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX2-RV32F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: cttz_v8i32:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV64F-NEXT:    vand.vv v10, v8, v10
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfcvt.f.xu.v v10, v10
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v10, v10, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 127
-; LMULMAX2-RV64F-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX2-RV64F-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV64F-NEXT:    li a1, 32
-; LMULMAX2-RV64F-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX2-RV64F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: cttz_v8i32:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV32D-NEXT:    vand.vv v10, v8, v10
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v10, v10
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    vsrl.vi v10, v10, 23
-; LMULMAX2-RV32D-NEXT:    li a1, 127
-; LMULMAX2-RV32D-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX2-RV32D-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV32D-NEXT:    li a1, 32
-; LMULMAX2-RV32D-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX2-RV32D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: cttz_v8i32:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV64D-NEXT:    vand.vv v10, v8, v10
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v10, v10
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    vsrl.vi v10, v10, 23
-; LMULMAX2-RV64D-NEXT:    li a1, 127
-; LMULMAX2-RV64D-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX2-RV64D-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV64D-NEXT:    li a1, 32
-; LMULMAX2-RV64D-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX2-RV64D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-NEXT:    vand.vv v10, v8, v10
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vnsrl.wx v10, v12, a1
-; LMULMAX8-NEXT:    li a1, 1023
-; LMULMAX8-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_v8i32:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVI-NEXT:    vle32.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v10, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    lui a1, 349525
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 209715
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 61681
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    lui a1, 4112
+; RVI-NEXT:    addi a1, a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 24
+; RVI-NEXT:    vse32.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_v8i32:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVF-NEXT:    vle32.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v10, v8, 0
+; RVF-NEXT:    vand.vv v10, v8, v10
+; RVF-NEXT:    fsrmi a1, 1
+; RVF-NEXT:    vfcvt.f.xu.v v10, v10
+; RVF-NEXT:    fsrm a1
+; RVF-NEXT:    vsrl.vi v10, v10, 23
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vsub.vx v10, v10, a1
+; RVF-NEXT:    vmseq.vi v0, v8, 0
+; RVF-NEXT:    li a1, 32
+; RVF-NEXT:    vmerge.vxm v8, v10, a1, v0
+; RVF-NEXT:    vse32.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_v8i32:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVD-NEXT:    vle32.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v10, v8, 0
+; RVD-NEXT:    vand.vv v10, v8, v10
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vnsrl.wx v10, v12, a1
+; RVD-NEXT:    li a1, 1023
+; RVD-NEXT:    vsub.vx v10, v10, a1
+; RVD-NEXT:    vmseq.vi v0, v8, 0
+; RVD-NEXT:    li a1, 32
+; RVD-NEXT:    vmerge.vxm v8, v10, a1, v0
+; RVD-NEXT:    vse32.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_v8i32:
 ; ZVBB:       # %bb.0:
@@ -999,187 +647,149 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
 
 define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: cttz_v4i64:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v10, v12
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v12, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v12, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    li a1, 56
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: cttz_v4i64:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    li a1, 1
-; LMULMAX2-RV64I-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 56
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: cttz_v4i64:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV32F-NEXT:    vand.vv v10, v8, v10
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfncvt.f.xu.w v12, v10
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v10, v12, 23
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vzext.vf2 v12, v10
-; LMULMAX2-RV32F-NEXT:    li a1, 127
-; LMULMAX2-RV32F-NEXT:    vsub.vx v10, v12, a1
-; LMULMAX2-RV32F-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV32F-NEXT:    li a1, 64
-; LMULMAX2-RV32F-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX2-RV32F-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: cttz_v4i64:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV64F-NEXT:    vand.vv v10, v8, v10
-; LMULMAX2-RV64F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfncvt.f.xu.w v12, v10
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v10, v12, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 127
-; LMULMAX2-RV64F-NEXT:    vwsubu.vx v12, v10, a1
-; LMULMAX2-RV64F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; LMULMAX2-RV64F-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV64F-NEXT:    li a1, 64
-; LMULMAX2-RV64F-NEXT:    vmerge.vxm v8, v12, a1, v0
-; LMULMAX2-RV64F-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: cttz_v4i64:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV32D-NEXT:    vand.vv v10, v8, v10
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v10, v10
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vsrl.vx v10, v10, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1023
-; LMULMAX2-RV32D-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX2-RV32D-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV32D-NEXT:    li a1, 64
-; LMULMAX2-RV32D-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX2-RV32D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: cttz_v4i64:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV64D-NEXT:    vand.vv v10, v8, v10
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v10, v10
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vsrl.vx v10, v10, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1023
-; LMULMAX2-RV64D-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX2-RV64D-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX2-RV64D-NEXT:    li a1, 64
-; LMULMAX2-RV64D-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX2-RV64D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_v4i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-NEXT:    vand.vv v10, v8, v10
-; LMULMAX8-NEXT:    fsrmi a1, 1
-; LMULMAX8-NEXT:    vfcvt.f.xu.v v10, v10
-; LMULMAX8-NEXT:    fsrm a1
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vsrl.vx v10, v10, a1
-; LMULMAX8-NEXT:    li a1, 1023
-; LMULMAX8-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-NEXT:    li a1, 64
-; LMULMAX8-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RV32I-LABEL: cttz_v4i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vle64.v v8, (a0)
+; RV32I-NEXT:    li a1, 1
+; RV32I-NEXT:    vsub.vx v10, v8, a1
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v10, v8, 1
+; RV32I-NEXT:    lui a1, 349525
+; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v12, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v10, v10, v12
+; RV32I-NEXT:    vsub.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v12, v8, v10
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vadd.vv v8, v12, v8
+; RV32I-NEXT:    vsrl.vi v10, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vmul.vv v8, v8, v10
+; RV32I-NEXT:    li a1, 56
+; RV32I-NEXT:    vsrl.vx v8, v8, a1
+; RV32I-NEXT:    vse64.v v8, (a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cttz_v4i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64I-NEXT:    vle64.v v8, (a0)
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    vsub.vx v10, v8, a1
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vand.vv v8, v8, v10
+; RV64I-NEXT:    vsrl.vi v10, v8, 1
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v10, v10, a1
+; RV64I-NEXT:    vsub.vv v8, v8, v10
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v10, v8, a1
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vadd.vv v8, v10, v8
+; RV64I-NEXT:    vsrl.vi v10, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v10
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    li a1, 56
+; RV64I-NEXT:    vsrl.vx v8, v8, a1
+; RV64I-NEXT:    vse64.v v8, (a0)
+; RV64I-NEXT:    ret
+;
+; RV32F-LABEL: cttz_v4i64:
+; RV32F:       # %bb.0:
+; RV32F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32F-NEXT:    vle64.v v8, (a0)
+; RV32F-NEXT:    vrsub.vi v10, v8, 0
+; RV32F-NEXT:    vand.vv v10, v8, v10
+; RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; RV32F-NEXT:    fsrmi a1, 1
+; RV32F-NEXT:    vfncvt.f.xu.w v12, v10
+; RV32F-NEXT:    fsrm a1
+; RV32F-NEXT:    vsrl.vi v10, v12, 23
+; RV32F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV32F-NEXT:    vzext.vf2 v12, v10
+; RV32F-NEXT:    li a1, 127
+; RV32F-NEXT:    vsub.vx v10, v12, a1
+; RV32F-NEXT:    vmseq.vi v0, v8, 0
+; RV32F-NEXT:    li a1, 64
+; RV32F-NEXT:    vmerge.vxm v8, v10, a1, v0
+; RV32F-NEXT:    vse64.v v8, (a0)
+; RV32F-NEXT:    ret
+;
+; RV64F-LABEL: cttz_v4i64:
+; RV64F:       # %bb.0:
+; RV64F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64F-NEXT:    vle64.v v8, (a0)
+; RV64F-NEXT:    vrsub.vi v10, v8, 0
+; RV64F-NEXT:    vand.vv v10, v8, v10
+; RV64F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; RV64F-NEXT:    fsrmi a1, 1
+; RV64F-NEXT:    vfncvt.f.xu.w v12, v10
+; RV64F-NEXT:    fsrm a1
+; RV64F-NEXT:    vsrl.vi v10, v12, 23
+; RV64F-NEXT:    li a1, 127
+; RV64F-NEXT:    vwsubu.vx v12, v10, a1
+; RV64F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV64F-NEXT:    vmseq.vi v0, v8, 0
+; RV64F-NEXT:    li a1, 64
+; RV64F-NEXT:    vmerge.vxm v8, v12, a1, v0
+; RV64F-NEXT:    vse64.v v8, (a0)
+; RV64F-NEXT:    ret
+;
+; RVD-LABEL: cttz_v4i64:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RVD-NEXT:    vle64.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v10, v8, 0
+; RVD-NEXT:    vand.vv v10, v8, v10
+; RVD-NEXT:    fsrmi a1, 1
+; RVD-NEXT:    vfcvt.f.xu.v v10, v10
+; RVD-NEXT:    fsrm a1
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vsrl.vx v10, v10, a1
+; RVD-NEXT:    li a1, 1023
+; RVD-NEXT:    vsub.vx v10, v10, a1
+; RVD-NEXT:    vmseq.vi v0, v8, 0
+; RVD-NEXT:    li a1, 64
+; RVD-NEXT:    vmerge.vxm v8, v10, a1, v0
+; RVD-NEXT:    vse64.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_v4i64:
 ; ZVBB:       # %bb.0:
@@ -1197,45 +807,62 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
 declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
 
 define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
-; CHECK-LABEL: cttz_zero_undef_v16i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    li a1, 1
-; CHECK-NEXT:    vsub.vx v9, v8, a1
-; CHECK-NEXT:    vnot.v v8, v8
-; CHECK-NEXT:    vand.vv v8, v8, v9
-; CHECK-NEXT:    vsrl.vi v9, v8, 1
-; CHECK-NEXT:    li a1, 85
-; CHECK-NEXT:    vand.vx v9, v9, a1
-; CHECK-NEXT:    vsub.vv v8, v8, v9
-; CHECK-NEXT:    li a1, 51
-; CHECK-NEXT:    vand.vx v9, v8, a1
-; CHECK-NEXT:    vsrl.vi v8, v8, 2
-; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vadd.vv v8, v9, v8
-; CHECK-NEXT:    vsrl.vi v9, v8, 4
-; CHECK-NEXT:    vadd.vv v8, v8, v9
-; CHECK-NEXT:    vand.vi v8, v8, 15
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_zero_undef_v16i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX8-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-NEXT:    vand.vv v8, v8, v9
-; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vzext.vf2 v10, v8
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-NEXT:    vnsrl.wi v8, v12, 23
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX8-NEXT:    li a1, 127
-; LMULMAX8-NEXT:    vsub.vx v8, v10, a1
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_zero_undef_v16i8:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RVI-NEXT:    vle8.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v9, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    li a1, 85
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    li a1, 51
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    vand.vi v8, v8, 15
+; RVI-NEXT:    vse8.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_zero_undef_v16i8:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RVF-NEXT:    vle8.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v9, v8, 0
+; RVF-NEXT:    vand.vv v8, v8, v9
+; RVF-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RVF-NEXT:    vzext.vf2 v10, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVF-NEXT:    vnsrl.wi v8, v12, 23
+; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RVF-NEXT:    vnsrl.wi v10, v8, 0
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vsub.vx v8, v10, a1
+; RVF-NEXT:    vse8.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_zero_undef_v16i8:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RVD-NEXT:    vle8.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v9, v8, 0
+; RVD-NEXT:    vand.vv v8, v8, v9
+; RVD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RVD-NEXT:    vzext.vf2 v10, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
+; RVD-NEXT:    vnsrl.wi v8, v12, 23
+; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RVD-NEXT:    vnsrl.wi v10, v8, 0
+; RVD-NEXT:    li a1, 127
+; RVD-NEXT:    vsub.vx v8, v10, a1
+; RVD-NEXT:    vse8.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_zero_undef_v16i8:
 ; ZVBB:       # %bb.0:
@@ -1252,160 +879,61 @@ define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
 }
 
 define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 5
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 3
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 1
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    li a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV32I-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    li a1, 1
-; LMULMAX2-RV64I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 5
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 3
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 1
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-RV64I-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX1-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-NEXT:    li a1, 1
-; LMULMAX1-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-NEXT:    lui a1, 5
-; LMULMAX1-NEXT:    addi a1, a1, 1365
-; LMULMAX1-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-NEXT:    lui a1, 3
-; LMULMAX1-NEXT:    addi a1, a1, 819
-; LMULMAX1-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-NEXT:    lui a1, 1
-; LMULMAX1-NEXT:    addi a1, a1, -241
-; LMULMAX1-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-NEXT:    li a1, 257
-; LMULMAX1-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32F-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32F-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV32F-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 127
-; LMULMAX2-RV32F-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64F-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64F-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV64F-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 127
-; LMULMAX2-RV64F-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32D-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV32D-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV32D-NEXT:    li a1, 127
-; LMULMAX2-RV32D-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64D-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV64D-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX2-RV64D-NEXT:    li a1, 127
-; LMULMAX2-RV64D-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_zero_undef_v8i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-NEXT:    vand.vv v8, v8, v9
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8-NEXT:    vnsrl.wi v8, v10, 23
-; LMULMAX8-NEXT:    li a1, 127
-; LMULMAX8-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_zero_undef_v8i16:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVI-NEXT:    vle16.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v9, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    lui a1, 5
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 3
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 1
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    li a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 8
+; RVI-NEXT:    vse16.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_zero_undef_v8i16:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVF-NEXT:    vle16.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v9, v8, 0
+; RVF-NEXT:    vand.vv v8, v8, v9
+; RVF-NEXT:    vfwcvt.f.xu.v v10, v8
+; RVF-NEXT:    vnsrl.wi v8, v10, 23
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vsub.vx v8, v8, a1
+; RVF-NEXT:    vse16.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_zero_undef_v8i16:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RVD-NEXT:    vle16.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v9, v8, 0
+; RVD-NEXT:    vand.vv v8, v8, v9
+; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
+; RVD-NEXT:    vnsrl.wi v8, v10, 23
+; RVD-NEXT:    li a1, 127
+; RVD-NEXT:    vsub.vx v8, v8, a1
+; RVD-NEXT:    vse16.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_zero_undef_v8i16:
 ; ZVBB:       # %bb.0:
@@ -1422,139 +950,65 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
 }
 
 define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v4i32:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV32I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v4i32:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    li a1, 1
-; LMULMAX2-RV64I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v4i32:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32F-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 127
-; LMULMAX2-RV32F-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v4i32:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64F-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 127
-; LMULMAX2-RV64F-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v4i32:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32D-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vnsrl.wx v8, v10, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1023
-; LMULMAX2-RV32D-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v4i32:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64D-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vnsrl.wx v8, v10, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1023
-; LMULMAX2-RV64D-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_zero_undef_v4i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-NEXT:    vand.vv v8, v8, v9
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vnsrl.wx v8, v10, a1
-; LMULMAX8-NEXT:    li a1, 1023
-; LMULMAX8-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_zero_undef_v4i32:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVI-NEXT:    vle32.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v9, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v9
+; RVI-NEXT:    vsrl.vi v9, v8, 1
+; RVI-NEXT:    lui a1, 349525
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v9, v9, a1
+; RVI-NEXT:    vsub.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 209715
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v9, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v9, v8
+; RVI-NEXT:    vsrl.vi v9, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v9
+; RVI-NEXT:    lui a1, 61681
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    lui a1, 4112
+; RVI-NEXT:    addi a1, a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 24
+; RVI-NEXT:    vse32.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_zero_undef_v4i32:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVF-NEXT:    vle32.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v9, v8, 0
+; RVF-NEXT:    vand.vv v8, v8, v9
+; RVF-NEXT:    fsrmi a1, 1
+; RVF-NEXT:    vfcvt.f.xu.v v8, v8
+; RVF-NEXT:    fsrm a1
+; RVF-NEXT:    vsrl.vi v8, v8, 23
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vsub.vx v8, v8, a1
+; RVF-NEXT:    vse32.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_zero_undef_v4i32:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RVD-NEXT:    vle32.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v9, v8, 0
+; RVD-NEXT:    vand.vv v8, v8, v9
+; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vnsrl.wx v8, v10, a1
+; RVD-NEXT:    li a1, 1023
+; RVD-NEXT:    vsub.vx v8, v8, a1
+; RVD-NEXT:    vse32.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_zero_undef_v4i32:
 ; ZVBB:       # %bb.0:
@@ -1571,171 +1025,139 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
 }
 
 define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v2i64:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v9, v9, v10
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v8, v9
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v9, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v9
-; LMULMAX2-RV32I-NEXT:    li a1, 56
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v2i64:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    li a1, 1
-; LMULMAX2-RV64I-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 56
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v2i64:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32F-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfncvt.f.xu.w v9, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v9, 23
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    vzext.vf2 v9, v8
-; LMULMAX2-RV32F-NEXT:    li a1, 127
-; LMULMAX2-RV32F-NEXT:    vsub.vx v8, v9, a1
-; LMULMAX2-RV32F-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v2i64:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64F-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfncvt.f.xu.w v9, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v9, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 127
-; LMULMAX2-RV64F-NEXT:    vwsubu.vx v9, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse64.v v9, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v2i64:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV32D-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1023
-; LMULMAX2-RV32D-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v2i64:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX2-RV64D-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1023
-; LMULMAX2-RV64D-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_zero_undef_v2i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-NEXT:    vand.vv v8, v8, v9
-; LMULMAX8-NEXT:    fsrmi a1, 1
-; LMULMAX8-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX8-NEXT:    fsrm a1
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 1023
-; LMULMAX8-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RV32I-LABEL: cttz_zero_undef_v2i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vle64.v v8, (a0)
+; RV32I-NEXT:    li a1, 1
+; RV32I-NEXT:    vsub.vx v9, v8, a1
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    vsrl.vi v9, v8, 1
+; RV32I-NEXT:    lui a1, 349525
+; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v9, v9, v10
+; RV32I-NEXT:    vsub.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v10, v8, v9
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    vadd.vv v8, v10, v8
+; RV32I-NEXT:    vsrl.vi v9, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV32I-NEXT:    vmv.v.x v9, a1
+; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vmul.vv v8, v8, v9
+; RV32I-NEXT:    li a1, 56
+; RV32I-NEXT:    vsrl.vx v8, v8, a1
+; RV32I-NEXT:    vse64.v v8, (a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cttz_zero_undef_v2i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64I-NEXT:    vle64.v v8, (a0)
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    vsub.vx v9, v8, a1
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vand.vv v8, v8, v9
+; RV64I-NEXT:    vsrl.vi v9, v8, 1
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v9, v9, a1
+; RV64I-NEXT:    vsub.vv v8, v8, v9
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v9, v8, a1
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vadd.vv v8, v9, v8
+; RV64I-NEXT:    vsrl.vi v9, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v9
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    li a1, 56
+; RV64I-NEXT:    vsrl.vx v8, v8, a1
+; RV64I-NEXT:    vse64.v v8, (a0)
+; RV64I-NEXT:    ret
+;
+; RV32F-LABEL: cttz_zero_undef_v2i64:
+; RV32F:       # %bb.0:
+; RV32F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32F-NEXT:    vle64.v v8, (a0)
+; RV32F-NEXT:    vrsub.vi v9, v8, 0
+; RV32F-NEXT:    vand.vv v8, v8, v9
+; RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV32F-NEXT:    fsrmi a1, 1
+; RV32F-NEXT:    vfncvt.f.xu.w v9, v8
+; RV32F-NEXT:    fsrm a1
+; RV32F-NEXT:    vsrl.vi v8, v9, 23
+; RV32F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV32F-NEXT:    vzext.vf2 v9, v8
+; RV32F-NEXT:    li a1, 127
+; RV32F-NEXT:    vsub.vx v8, v9, a1
+; RV32F-NEXT:    vse64.v v8, (a0)
+; RV32F-NEXT:    ret
+;
+; RV64F-LABEL: cttz_zero_undef_v2i64:
+; RV64F:       # %bb.0:
+; RV64F-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64F-NEXT:    vle64.v v8, (a0)
+; RV64F-NEXT:    vrsub.vi v9, v8, 0
+; RV64F-NEXT:    vand.vv v8, v8, v9
+; RV64F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV64F-NEXT:    fsrmi a1, 1
+; RV64F-NEXT:    vfncvt.f.xu.w v9, v8
+; RV64F-NEXT:    fsrm a1
+; RV64F-NEXT:    vsrl.vi v8, v9, 23
+; RV64F-NEXT:    li a1, 127
+; RV64F-NEXT:    vwsubu.vx v9, v8, a1
+; RV64F-NEXT:    vse64.v v9, (a0)
+; RV64F-NEXT:    ret
+;
+; RVD-LABEL: cttz_zero_undef_v2i64:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RVD-NEXT:    vle64.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v9, v8, 0
+; RVD-NEXT:    vand.vv v8, v8, v9
+; RVD-NEXT:    fsrmi a1, 1
+; RVD-NEXT:    vfcvt.f.xu.v v8, v8
+; RVD-NEXT:    fsrm a1
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vsrl.vx v8, v8, a1
+; RVD-NEXT:    li a1, 1023
+; RVD-NEXT:    vsub.vx v8, v8, a1
+; RVD-NEXT:    vse64.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_zero_undef_v2i64:
 ; ZVBB:       # %bb.0:
@@ -1752,86 +1174,65 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 }
 
 define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
-; LMULMAX2-LABEL: cttz_zero_undef_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    li a1, 1
-; LMULMAX2-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-NEXT:    vnot.v v8, v8
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    li a1, 85
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    li a1, 51
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: cttz_zero_undef_v32i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-NEXT:    li a2, 1
-; LMULMAX1-NEXT:    vsub.vx v10, v8, a2
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    li a3, 85
-; LMULMAX1-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    li a4, 51
-; LMULMAX1-NEXT:    vand.vx v10, v8, a4
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-NEXT:    vsub.vx v10, v9, a2
-; LMULMAX1-NEXT:    vnot.v v9, v9
-; LMULMAX1-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a4
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vi v9, v9, 15
-; LMULMAX1-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_zero_undef_v32i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-NEXT:    vand.vv v8, v8, v10
-; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; LMULMAX8-NEXT:    vzext.vf2 v12, v8
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v16, v12
-; LMULMAX8-NEXT:    vnsrl.wi v8, v16, 23
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v12, v8, 0
-; LMULMAX8-NEXT:    li a1, 127
-; LMULMAX8-NEXT:    vsub.vx v8, v12, a1
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_zero_undef_v32i8:
+; RVI:       # %bb.0:
+; RVI-NEXT:    li a1, 32
+; RVI-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RVI-NEXT:    vle8.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v10, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    li a1, 85
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    li a1, 51
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    vand.vi v8, v8, 15
+; RVI-NEXT:    vse8.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_zero_undef_v32i8:
+; RVF:       # %bb.0:
+; RVF-NEXT:    li a1, 32
+; RVF-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RVF-NEXT:    vle8.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v10, v8, 0
+; RVF-NEXT:    vand.vv v8, v8, v10
+; RVF-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; RVF-NEXT:    vzext.vf2 v12, v8
+; RVF-NEXT:    vfwcvt.f.xu.v v16, v12
+; RVF-NEXT:    vnsrl.wi v8, v16, 23
+; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; RVF-NEXT:    vnsrl.wi v12, v8, 0
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vsub.vx v8, v12, a1
+; RVF-NEXT:    vse8.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_zero_undef_v32i8:
+; RVD:       # %bb.0:
+; RVD-NEXT:    li a1, 32
+; RVD-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; RVD-NEXT:    vle8.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v10, v8, 0
+; RVD-NEXT:    vand.vv v8, v8, v10
+; RVD-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; RVD-NEXT:    vzext.vf2 v12, v8
+; RVD-NEXT:    vfwcvt.f.xu.v v16, v12
+; RVD-NEXT:    vnsrl.wi v8, v16, 23
+; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; RVD-NEXT:    vnsrl.wi v12, v8, 0
+; RVD-NEXT:    li a1, 127
+; RVD-NEXT:    vsub.vx v8, v12, a1
+; RVD-NEXT:    vse8.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_zero_undef_v32i8:
 ; ZVBB:       # %bb.0:
@@ -1849,96 +1250,61 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
 }
 
 define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
-; LMULMAX2-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    li a1, 1
-; LMULMAX2-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-NEXT:    vnot.v v8, v8
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-NEXT:    lui a1, 5
-; LMULMAX2-NEXT:    addi a1, a1, 1365
-; LMULMAX2-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 3
-; LMULMAX2-NEXT:    addi a1, a1, 819
-; LMULMAX2-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 1
-; LMULMAX2-NEXT:    addi a1, a1, -241
-; LMULMAX2-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-NEXT:    li a1, 257
-; LMULMAX2-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-NEXT:    li a2, 1
-; LMULMAX1-NEXT:    vsub.vx v10, v8, a2
-; LMULMAX1-NEXT:    vnot.v v8, v8
-; LMULMAX1-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-NEXT:    lui a3, 5
-; LMULMAX1-NEXT:    addi a3, a3, 1365
-; LMULMAX1-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a4, 3
-; LMULMAX1-NEXT:    addi a4, a4, 819
-; LMULMAX1-NEXT:    vand.vx v10, v8, a4
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    lui a5, 1
-; LMULMAX1-NEXT:    addi a5, a5, -241
-; LMULMAX1-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-NEXT:    li a6, 257
-; LMULMAX1-NEXT:    vmul.vx v8, v8, a6
-; LMULMAX1-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-NEXT:    vsub.vx v10, v9, a2
-; LMULMAX1-NEXT:    vnot.v v9, v9
-; LMULMAX1-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v10, v9, a4
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-NEXT:    vmul.vx v9, v9, a6
-; LMULMAX1-NEXT:    vsrl.vi v9, v9, 8
-; LMULMAX1-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_zero_undef_v16i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-NEXT:    vand.vv v8, v8, v10
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v8
-; LMULMAX8-NEXT:    vnsrl.wi v8, v12, 23
-; LMULMAX8-NEXT:    li a1, 127
-; LMULMAX8-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_zero_undef_v16i16:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVI-NEXT:    vle16.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v10, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    lui a1, 5
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 3
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 1
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    li a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 8
+; RVI-NEXT:    vse16.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_zero_undef_v16i16:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVF-NEXT:    vle16.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v10, v8, 0
+; RVF-NEXT:    vand.vv v8, v8, v10
+; RVF-NEXT:    vfwcvt.f.xu.v v12, v8
+; RVF-NEXT:    vnsrl.wi v8, v12, 23
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vsub.vx v8, v8, a1
+; RVF-NEXT:    vse16.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_zero_undef_v16i16:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RVD-NEXT:    vle16.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v10, v8, 0
+; RVD-NEXT:    vand.vv v8, v8, v10
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
+; RVD-NEXT:    vnsrl.wi v8, v12, 23
+; RVD-NEXT:    li a1, 127
+; RVD-NEXT:    vsub.vx v8, v8, a1
+; RVD-NEXT:    vse16.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_zero_undef_v16i16:
 ; ZVBB:       # %bb.0:
@@ -1955,141 +1321,65 @@ define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
 }
 
 define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v8i32:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV32I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v8i32:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    li a1, 1
-; LMULMAX2-RV64I-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX2-RV64I-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v8i32:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV32F-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV32F-NEXT:    li a1, 127
-; LMULMAX2-RV32F-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV32F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v8i32:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV64F-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 127
-; LMULMAX2-RV64F-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v8i32:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV32D-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV32D-NEXT:    li a1, 127
-; LMULMAX2-RV32D-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v8i32:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV64D-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    vsrl.vi v8, v8, 23
-; LMULMAX2-RV64D-NEXT:    li a1, 127
-; LMULMAX2-RV64D-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_zero_undef_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-NEXT:    vand.vv v8, v8, v10
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v8
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vnsrl.wx v8, v12, a1
-; LMULMAX8-NEXT:    li a1, 1023
-; LMULMAX8-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RVI-LABEL: cttz_zero_undef_v8i32:
+; RVI:       # %bb.0:
+; RVI-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVI-NEXT:    vle32.v v8, (a0)
+; RVI-NEXT:    li a1, 1
+; RVI-NEXT:    vsub.vx v10, v8, a1
+; RVI-NEXT:    vnot.v v8, v8
+; RVI-NEXT:    vand.vv v8, v8, v10
+; RVI-NEXT:    vsrl.vi v10, v8, 1
+; RVI-NEXT:    lui a1, 349525
+; RVI-NEXT:    addi a1, a1, 1365
+; RVI-NEXT:    vand.vx v10, v10, a1
+; RVI-NEXT:    vsub.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 209715
+; RVI-NEXT:    addi a1, a1, 819
+; RVI-NEXT:    vand.vx v10, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 2
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    vadd.vv v8, v10, v8
+; RVI-NEXT:    vsrl.vi v10, v8, 4
+; RVI-NEXT:    vadd.vv v8, v8, v10
+; RVI-NEXT:    lui a1, 61681
+; RVI-NEXT:    addi a1, a1, -241
+; RVI-NEXT:    vand.vx v8, v8, a1
+; RVI-NEXT:    lui a1, 4112
+; RVI-NEXT:    addi a1, a1, 257
+; RVI-NEXT:    vmul.vx v8, v8, a1
+; RVI-NEXT:    vsrl.vi v8, v8, 24
+; RVI-NEXT:    vse32.v v8, (a0)
+; RVI-NEXT:    ret
+;
+; RVF-LABEL: cttz_zero_undef_v8i32:
+; RVF:       # %bb.0:
+; RVF-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVF-NEXT:    vle32.v v8, (a0)
+; RVF-NEXT:    vrsub.vi v10, v8, 0
+; RVF-NEXT:    vand.vv v8, v8, v10
+; RVF-NEXT:    fsrmi a1, 1
+; RVF-NEXT:    vfcvt.f.xu.v v8, v8
+; RVF-NEXT:    fsrm a1
+; RVF-NEXT:    vsrl.vi v8, v8, 23
+; RVF-NEXT:    li a1, 127
+; RVF-NEXT:    vsub.vx v8, v8, a1
+; RVF-NEXT:    vse32.v v8, (a0)
+; RVF-NEXT:    ret
+;
+; RVD-LABEL: cttz_zero_undef_v8i32:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RVD-NEXT:    vle32.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v10, v8, 0
+; RVD-NEXT:    vand.vv v8, v8, v10
+; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vnsrl.wx v8, v12, a1
+; RVD-NEXT:    li a1, 1023
+; RVD-NEXT:    vsub.vx v8, v8, a1
+; RVD-NEXT:    vse32.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_zero_undef_v8i32:
 ; ZVBB:       # %bb.0:
@@ -2106,171 +1396,139 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
 }
 
 define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
-; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v4i64:
-; LMULMAX2-RV32I:       # %bb.0:
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    li a1, 1
-; LMULMAX2-RV32I-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV32I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32I-NEXT:    lui a1, 349525
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 1365
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v10, v10, v12
-; LMULMAX2-RV32I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 209715
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 819
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v12, v8, v10
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v12, v8
-; LMULMAX2-RV32I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 61681
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, -241
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    lui a1, 4112
-; LMULMAX2-RV32I-NEXT:    addi a1, a1, 257
-; LMULMAX2-RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32I-NEXT:    vmul.vv v8, v8, v10
-; LMULMAX2-RV32I-NEXT:    li a1, 56
-; LMULMAX2-RV32I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32I-NEXT:    ret
-;
-; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v4i64:
-; LMULMAX2-RV64I:       # %bb.0:
-; LMULMAX2-RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64I-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    li a1, 1
-; LMULMAX2-RV64I-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vnot.v v8, v8
-; LMULMAX2-RV64I-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64I-NEXT:    lui a1, 349525
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64I-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 209715
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 819
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64I-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64I-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64I-NEXT:    lui a1, 61681
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, -241
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    lui a1, 4112
-; LMULMAX2-RV64I-NEXT:    addiw a1, a1, 257
-; LMULMAX2-RV64I-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64I-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64I-NEXT:    vmul.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    li a1, 56
-; LMULMAX2-RV64I-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64I-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64I-NEXT:    ret
-;
-; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v4i64:
-; LMULMAX2-RV32F:       # %bb.0:
-; LMULMAX2-RV32F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV32F-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; LMULMAX2-RV32F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32F-NEXT:    vfncvt.f.xu.w v10, v8
-; LMULMAX2-RV32F-NEXT:    fsrm a1
-; LMULMAX2-RV32F-NEXT:    vsrl.vi v8, v10, 23
-; LMULMAX2-RV32F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; LMULMAX2-RV32F-NEXT:    vzext.vf2 v10, v8
-; LMULMAX2-RV32F-NEXT:    li a1, 127
-; LMULMAX2-RV32F-NEXT:    vsub.vx v8, v10, a1
-; LMULMAX2-RV32F-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32F-NEXT:    ret
-;
-; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v4i64:
-; LMULMAX2-RV64F:       # %bb.0:
-; LMULMAX2-RV64F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64F-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64F-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV64F-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; LMULMAX2-RV64F-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64F-NEXT:    vfncvt.f.xu.w v10, v8
-; LMULMAX2-RV64F-NEXT:    fsrm a1
-; LMULMAX2-RV64F-NEXT:    vsrl.vi v8, v10, 23
-; LMULMAX2-RV64F-NEXT:    li a1, 127
-; LMULMAX2-RV64F-NEXT:    vwsubu.vx v10, v8, a1
-; LMULMAX2-RV64F-NEXT:    vse64.v v10, (a0)
-; LMULMAX2-RV64F-NEXT:    ret
-;
-; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v4i64:
-; LMULMAX2-RV32D:       # %bb.0:
-; LMULMAX2-RV32D-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV32D-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV32D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV32D-NEXT:    fsrm a1
-; LMULMAX2-RV32D-NEXT:    li a1, 52
-; LMULMAX2-RV32D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    li a1, 1023
-; LMULMAX2-RV32D-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV32D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32D-NEXT:    ret
-;
-; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v4i64:
-; LMULMAX2-RV64D:       # %bb.0:
-; LMULMAX2-RV64D-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64D-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX2-RV64D-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64D-NEXT:    fsrmi a1, 1
-; LMULMAX2-RV64D-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX2-RV64D-NEXT:    fsrm a1
-; LMULMAX2-RV64D-NEXT:    li a1, 52
-; LMULMAX2-RV64D-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    li a1, 1023
-; LMULMAX2-RV64D-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX2-RV64D-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64D-NEXT:    ret
-;
-; LMULMAX8-LABEL: cttz_zero_undef_v4i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-NEXT:    vand.vv v8, v8, v10
-; LMULMAX8-NEXT:    fsrmi a1, 1
-; LMULMAX8-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX8-NEXT:    fsrm a1
-; LMULMAX8-NEXT:    li a1, 52
-; LMULMAX8-NEXT:    vsrl.vx v8, v8, a1
-; LMULMAX8-NEXT:    li a1, 1023
-; LMULMAX8-NEXT:    vsub.vx v8, v8, a1
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
+; RV32I-LABEL: cttz_zero_undef_v4i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vle64.v v8, (a0)
+; RV32I-NEXT:    li a1, 1
+; RV32I-NEXT:    vsub.vx v10, v8, a1
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v10, v8, 1
+; RV32I-NEXT:    lui a1, 349525
+; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v12, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v10, v10, v12
+; RV32I-NEXT:    vsub.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v12, v8, v10
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vadd.vv v8, v12, v8
+; RV32I-NEXT:    vsrl.vi v10, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; RV32I-NEXT:    vmv.v.x v10, a1
+; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vmul.vv v8, v8, v10
+; RV32I-NEXT:    li a1, 56
+; RV32I-NEXT:    vsrl.vx v8, v8, a1
+; RV32I-NEXT:    vse64.v v8, (a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cttz_zero_undef_v4i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64I-NEXT:    vle64.v v8, (a0)
+; RV64I-NEXT:    li a1, 1
+; RV64I-NEXT:    vsub.vx v10, v8, a1
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vand.vv v8, v8, v10
+; RV64I-NEXT:    vsrl.vi v10, v8, 1
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v10, v10, a1
+; RV64I-NEXT:    vsub.vv v8, v8, v10
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v10, v8, a1
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vadd.vv v8, v10, v8
+; RV64I-NEXT:    vsrl.vi v10, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v10
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a2, a1, 32
+; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    li a1, 56
+; RV64I-NEXT:    vsrl.vx v8, v8, a1
+; RV64I-NEXT:    vse64.v v8, (a0)
+; RV64I-NEXT:    ret
+;
+; RV32F-LABEL: cttz_zero_undef_v4i64:
+; RV32F:       # %bb.0:
+; RV32F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32F-NEXT:    vle64.v v8, (a0)
+; RV32F-NEXT:    vrsub.vi v10, v8, 0
+; RV32F-NEXT:    vand.vv v8, v8, v10
+; RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; RV32F-NEXT:    fsrmi a1, 1
+; RV32F-NEXT:    vfncvt.f.xu.w v10, v8
+; RV32F-NEXT:    fsrm a1
+; RV32F-NEXT:    vsrl.vi v8, v10, 23
+; RV32F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV32F-NEXT:    vzext.vf2 v10, v8
+; RV32F-NEXT:    li a1, 127
+; RV32F-NEXT:    vsub.vx v8, v10, a1
+; RV32F-NEXT:    vse64.v v8, (a0)
+; RV32F-NEXT:    ret
+;
+; RV64F-LABEL: cttz_zero_undef_v4i64:
+; RV64F:       # %bb.0:
+; RV64F-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64F-NEXT:    vle64.v v8, (a0)
+; RV64F-NEXT:    vrsub.vi v10, v8, 0
+; RV64F-NEXT:    vand.vv v8, v8, v10
+; RV64F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; RV64F-NEXT:    fsrmi a1, 1
+; RV64F-NEXT:    vfncvt.f.xu.w v10, v8
+; RV64F-NEXT:    fsrm a1
+; RV64F-NEXT:    vsrl.vi v8, v10, 23
+; RV64F-NEXT:    li a1, 127
+; RV64F-NEXT:    vwsubu.vx v10, v8, a1
+; RV64F-NEXT:    vse64.v v10, (a0)
+; RV64F-NEXT:    ret
+;
+; RVD-LABEL: cttz_zero_undef_v4i64:
+; RVD:       # %bb.0:
+; RVD-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RVD-NEXT:    vle64.v v8, (a0)
+; RVD-NEXT:    vrsub.vi v10, v8, 0
+; RVD-NEXT:    vand.vv v8, v8, v10
+; RVD-NEXT:    fsrmi a1, 1
+; RVD-NEXT:    vfcvt.f.xu.v v8, v8
+; RVD-NEXT:    fsrm a1
+; RVD-NEXT:    li a1, 52
+; RVD-NEXT:    vsrl.vx v8, v8, a1
+; RVD-NEXT:    li a1, 1023
+; RVD-NEXT:    vsub.vx v8, v8, a1
+; RVD-NEXT:    vse64.v v8, (a0)
+; RVD-NEXT:    ret
 ;
 ; ZVBB-LABEL: cttz_zero_undef_v4i64:
 ; ZVBB:       # %bb.0:
@@ -2286,7 +1544,5 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
   ret void
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; LMULMAX1-RV32: {{.*}}
-; LMULMAX1-RV64: {{.*}}
-; LMULMAX2-RV32: {{.*}}
-; LMULMAX2-RV64: {{.*}}
+; RV32D: {{.*}}
+; RV64D: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
index 4aaefb24d5aa27..369f90521cf00a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
 
 define <2 x i16> @sextload_v2i1_v2i16(ptr %x) {
 ; CHECK-LABEL: sextload_v2i1_v2i16:
@@ -138,46 +136,24 @@ define <4 x i32> @zextload_v4i8_v4i32(ptr %x) {
 }
 
 define <4 x i64> @sextload_v4i8_v4i64(ptr %x) {
-; LMULMAX1-LABEL: sextload_v4i8_v4i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vle8.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v9, v8
-; LMULMAX1-NEXT:    vsext.vf8 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v4i8_v4i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX4-NEXT:    vle8.v v10, (a0)
-; LMULMAX4-NEXT:    vsext.vf8 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v4i8_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vsext.vf8 v8, v10
+; CHECK-NEXT:    ret
   %y = load <4 x i8>, ptr %x
   %z = sext <4 x i8> %y to <4 x i64>
   ret <4 x i64> %z
 }
 
 define <4 x i64> @zextload_v4i8_v4i64(ptr %x) {
-; LMULMAX1-LABEL: zextload_v4i8_v4i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vle8.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v9, v8
-; LMULMAX1-NEXT:    vzext.vf8 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v4i8_v4i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX4-NEXT:    vle8.v v10, (a0)
-; LMULMAX4-NEXT:    vzext.vf8 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v4i8_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vzext.vf8 v8, v10
+; CHECK-NEXT:    ret
   %y = load <4 x i8>, ptr %x
   %z = zext <4 x i8> %y to <4 x i64>
   ret <4 x i64> %z
@@ -208,324 +184,120 @@ define <8 x i16> @zextload_v8i8_v8i16(ptr %x) {
 }
 
 define <8 x i32> @sextload_v8i8_v8i32(ptr %x) {
-; LMULMAX1-LABEL: sextload_v8i8_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vle8.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v9, v8
-; LMULMAX1-NEXT:    vsext.vf4 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v8i8_v8i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vle8.v v10, (a0)
-; LMULMAX4-NEXT:    vsext.vf4 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v8i8_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vsext.vf4 v8, v10
+; CHECK-NEXT:    ret
   %y = load <8 x i8>, ptr %x
   %z = sext <8 x i8> %y to <8 x i32>
   ret <8 x i32> %z
 }
 
 define <8 x i32> @zextload_v8i8_v8i32(ptr %x) {
-; LMULMAX1-LABEL: zextload_v8i8_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vle8.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v9, v8
-; LMULMAX1-NEXT:    vzext.vf4 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v8i8_v8i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vle8.v v10, (a0)
-; LMULMAX4-NEXT:    vzext.vf4 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v8i8_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vzext.vf4 v8, v10
+; CHECK-NEXT:    ret
   %y = load <8 x i8>, ptr %x
   %z = zext <8 x i8> %y to <8 x i32>
   ret <8 x i32> %z
 }
 
 define <8 x i64> @sextload_v8i8_v8i64(ptr %x) {
-; LMULMAX1-LABEL: sextload_v8i8_v8i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v9, v12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v8i8_v8i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vle8.v v12, (a0)
-; LMULMAX4-NEXT:    vsext.vf8 v8, v12
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v8i8_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vsext.vf8 v8, v12
+; CHECK-NEXT:    ret
   %y = load <8 x i8>, ptr %x
   %z = sext <8 x i8> %y to <8 x i64>
   ret <8 x i64> %z
 }
 
 define <8 x i64> @zextload_v8i8_v8i64(ptr %x) {
-; LMULMAX1-LABEL: zextload_v8i8_v8i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v9, v12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v8i8_v8i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vle8.v v12, (a0)
-; LMULMAX4-NEXT:    vzext.vf8 v8, v12
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v8i8_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vzext.vf8 v8, v12
+; CHECK-NEXT:    ret
   %y = load <8 x i8>, ptr %x
   %z = zext <8 x i8> %y to <8 x i64>
   ret <8 x i64> %z
 }
 
 define <16 x i16> @sextload_v16i8_v16i16(ptr %x) {
-; LMULMAX1-LABEL: sextload_v16i8_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vle8.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 8
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v9, v8
-; LMULMAX1-NEXT:    vsext.vf2 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v16i8_v16i16:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX4-NEXT:    vle8.v v10, (a0)
-; LMULMAX4-NEXT:    vsext.vf2 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v16i8_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = sext <16 x i8> %y to <16 x i16>
   ret <16 x i16> %z
 }
 
 define <16 x i16> @zextload_v16i8_v16i16(ptr %x) {
-; LMULMAX1-LABEL: zextload_v16i8_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vle8.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 8
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v9, v8
-; LMULMAX1-NEXT:    vzext.vf2 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v16i8_v16i16:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX4-NEXT:    vle8.v v10, (a0)
-; LMULMAX4-NEXT:    vzext.vf2 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v16i8_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = zext <16 x i8> %y to <16 x i16>
   ret <16 x i16> %z
 }
 
 define <16 x i32> @sextload_v16i8_v16i32(ptr %x) {
-; LMULMAX1-LABEL: sextload_v16i8_v16i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v9, 8
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v11, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v9, v12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v16i8_v16i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle8.v v12, (a0)
-; LMULMAX4-NEXT:    vsext.vf4 v8, v12
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v16i8_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = sext <16 x i8> %y to <16 x i32>
   ret <16 x i32> %z
 }
 
 define <16 x i32> @zextload_v16i8_v16i32(ptr %x) {
-; LMULMAX1-LABEL: zextload_v16i8_v16i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v9, 8
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v11, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v9, v12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v16i8_v16i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle8.v v12, (a0)
-; LMULMAX4-NEXT:    vzext.vf4 v8, v12
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v16i8_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = zext <16 x i8> %y to <16 x i32>
   ret <16 x i32> %z
 }
 
 define <16 x i64> @sextload_v16i8_v16i64(ptr %x) {
-; LMULMAX1-LABEL: sextload_v16i8_v16i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vle8.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v10, 8
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v12, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v13, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v9, v13
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v14, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v13, v14
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v11, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v14, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v15, v11
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v16, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf8 v11, v16
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v16i8_v16i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX4-NEXT:    vle8.v v16, (a0)
-; LMULMAX4-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX4-NEXT:    vslidedown.vi v8, v16, 8
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vsext.vf8 v12, v8
-; LMULMAX4-NEXT:    vsext.vf8 v8, v16
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v16i8_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle8.v v16, (a0)
+; CHECK-NEXT:    vsext.vf8 v8, v16
+; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = sext <16 x i8> %y to <16 x i64>
   ret <16 x i64> %z
 }
 
 define <16 x i64> @zextload_v16i8_v16i64(ptr %x) {
-; LMULMAX1-LABEL: zextload_v16i8_v16i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vle8.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v10, 8
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v12, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v13, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v9, v13
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v14, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v13, v14
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v11, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v14, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v15, v11
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v16, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf8 v11, v16
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v16i8_v16i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX4-NEXT:    vle8.v v16, (a0)
-; LMULMAX4-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX4-NEXT:    vslidedown.vi v8, v16, 8
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vzext.vf8 v12, v8
-; LMULMAX4-NEXT:    vzext.vf8 v8, v16
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v16i8_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle8.v v16, (a0)
+; CHECK-NEXT:    vzext.vf8 v8, v16
+; CHECK-NEXT:    ret
   %y = load <16 x i8>, ptr %x
   %z = zext <16 x i8> %y to <16 x i64>
   ret <16 x i64> %z
@@ -649,46 +421,24 @@ define <4 x i32> @zextload_v4i16_v4i32(ptr %x) {
 }
 
 define <4 x i64> @sextload_v4i16_v4i64(ptr %x) {
-; LMULMAX1-LABEL: sextload_v4i16_v4i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vle16.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v9, v8
-; LMULMAX1-NEXT:    vsext.vf4 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v4i16_v4i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX4-NEXT:    vle16.v v10, (a0)
-; LMULMAX4-NEXT:    vsext.vf4 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v4i16_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vsext.vf4 v8, v10
+; CHECK-NEXT:    ret
   %y = load <4 x i16>, ptr %x
   %z = sext <4 x i16> %y to <4 x i64>
   ret <4 x i64> %z
 }
 
 define <4 x i64> @zextload_v4i16_v4i64(ptr %x) {
-; LMULMAX1-LABEL: zextload_v4i16_v4i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vle16.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v9, v8
-; LMULMAX1-NEXT:    vzext.vf4 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v4i16_v4i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX4-NEXT:    vle16.v v10, (a0)
-; LMULMAX4-NEXT:    vzext.vf4 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v4i16_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vzext.vf4 v8, v10
+; CHECK-NEXT:    ret
   %y = load <4 x i16>, ptr %x
   %z = zext <4 x i16> %y to <4 x i64>
   ret <4 x i64> %z
@@ -707,294 +457,108 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %z) {
 }
 
 define <8 x i32> @sextload_v8i16_v8i32(ptr %x) {
-; LMULMAX1-LABEL: sextload_v8i16_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v9, v8
-; LMULMAX1-NEXT:    vsext.vf2 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v8i16_v8i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vle16.v v10, (a0)
-; LMULMAX4-NEXT:    vsext.vf2 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v8i16_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    ret
   %y = load <8 x i16>, ptr %x
   %z = sext <8 x i16> %y to <8 x i32>
   ret <8 x i32> %z
 }
 
 define <8 x i32> @zextload_v8i16_v8i32(ptr %x) {
-; LMULMAX1-LABEL: zextload_v8i16_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v9, v8
-; LMULMAX1-NEXT:    vzext.vf2 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v8i16_v8i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vle16.v v10, (a0)
-; LMULMAX4-NEXT:    vzext.vf2 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v8i16_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    ret
   %y = load <8 x i16>, ptr %x
   %z = zext <8 x i16> %y to <8 x i32>
   ret <8 x i32> %z
 }
 
 define <8 x i64> @sextload_v8i16_v8i64(ptr %x) {
-; LMULMAX1-LABEL: sextload_v8i16_v8i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v9, v12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v8i16_v8i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vle16.v v12, (a0)
-; LMULMAX4-NEXT:    vsext.vf4 v8, v12
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v8i16_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    ret
   %y = load <8 x i16>, ptr %x
   %z = sext <8 x i16> %y to <8 x i64>
   ret <8 x i64> %z
 }
 
 define <8 x i64> @zextload_v8i16_v8i64(ptr %x) {
-; LMULMAX1-LABEL: zextload_v8i16_v8i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v9, v12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v8i16_v8i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vle16.v v12, (a0)
-; LMULMAX4-NEXT:    vzext.vf4 v8, v12
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v8i16_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vzext.vf4 v8, v12
+; CHECK-NEXT:    ret
   %y = load <8 x i16>, ptr %x
   %z = zext <8 x i16> %y to <8 x i64>
   ret <8 x i64> %z
 }
 
 define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v16i16_v16i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 8
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v16i16_v16i8:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX4-NEXT:    vse8.v v10, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v16i16_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vse8.v v10, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <16 x i16> %x to <16 x i8>
   store <16 x i8> %y, ptr %z
   ret void
 }
 
 define <16 x i32> @sextload_v16i16_v16i32(ptr %x) {
-; LMULMAX1-LABEL: sextload_v16i16_v16i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v10, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle16.v v12, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v9, v8
-; LMULMAX1-NEXT:    vsext.vf2 v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v12, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v11, v10
-; LMULMAX1-NEXT:    vsext.vf2 v10, v12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v16i16_v16i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle16.v v12, (a0)
-; LMULMAX4-NEXT:    vsext.vf2 v8, v12
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v16i16_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    ret
   %y = load <16 x i16>, ptr %x
   %z = sext <16 x i16> %y to <16 x i32>
   ret <16 x i32> %z
 }
 
 define <16 x i32> @zextload_v16i16_v16i32(ptr %x) {
-; LMULMAX1-LABEL: zextload_v16i16_v16i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v10, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle16.v v12, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v9, v8
-; LMULMAX1-NEXT:    vzext.vf2 v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v12, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v11, v10
-; LMULMAX1-NEXT:    vzext.vf2 v10, v12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v16i16_v16i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle16.v v12, (a0)
-; LMULMAX4-NEXT:    vzext.vf2 v8, v12
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v16i16_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    ret
   %y = load <16 x i16>, ptr %x
   %z = zext <16 x i16> %y to <16 x i32>
   ret <16 x i32> %z
 }
 
 define <16 x i64> @sextload_v16i16_v16i64(ptr %x) {
-; LMULMAX1-LABEL: sextload_v16i16_v16i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle16.v v13, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v9, v12
-; LMULMAX1-NEXT:    vsext.vf4 v12, v13
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v15, v13, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v14, v15
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v16, v15, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v15, v16
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v16, v13, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v13, v16
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v16i16_v16i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX4-NEXT:    vle16.v v16, (a0)
-; LMULMAX4-NEXT:    vsetivli zero, 8, e16, m2, ta, ma
-; LMULMAX4-NEXT:    vslidedown.vi v8, v16, 8
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vsext.vf4 v12, v8
-; LMULMAX4-NEXT:    vsext.vf4 v8, v16
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v16i16_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle16.v v16, (a0)
+; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    ret
   %y = load <16 x i16>, ptr %x
   %z = sext <16 x i16> %y to <16 x i64>
   ret <16 x i64> %z
 }
 
 define <16 x i64> @zextload_v16i16_v16i64(ptr %x) {
-; LMULMAX1-LABEL: zextload_v16i16_v16i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle16.v v13, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v11, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v9, v12
-; LMULMAX1-NEXT:    vzext.vf4 v12, v13
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v15, v13, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v14, v15
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v16, v15, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v15, v16
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v16, v13, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf4 v13, v16
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v16i16_v16i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX4-NEXT:    vle16.v v16, (a0)
-; LMULMAX4-NEXT:    vsetivli zero, 8, e16, m2, ta, ma
-; LMULMAX4-NEXT:    vslidedown.vi v8, v16, 8
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vzext.vf4 v12, v8
-; LMULMAX4-NEXT:    vzext.vf4 v8, v16
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v16i16_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle16.v v16, (a0)
+; CHECK-NEXT:    vzext.vf4 v8, v16
+; CHECK-NEXT:    ret
   %y = load <16 x i16>, ptr %x
   %z = zext <16 x i16> %y to <16 x i64>
   ret <16 x i64> %z
@@ -1077,323 +641,124 @@ define void @truncstore_v4i32_v4i16(<4 x i32> %x, ptr %z) {
 }
 
 define <4 x i64> @sextload_v4i32_v4i64(ptr %x) {
-; LMULMAX1-LABEL: sextload_v4i32_v4i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v9, v8
-; LMULMAX1-NEXT:    vsext.vf2 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v4i32_v4i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX4-NEXT:    vle32.v v10, (a0)
-; LMULMAX4-NEXT:    vsext.vf2 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v4i32_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    ret
   %y = load <4 x i32>, ptr %x
   %z = sext <4 x i32> %y to <4 x i64>
   ret <4 x i64> %z
 }
 
 define <4 x i64> @zextload_v4i32_v4i64(ptr %x) {
-; LMULMAX1-LABEL: zextload_v4i32_v4i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v10, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v9, v8
-; LMULMAX1-NEXT:    vzext.vf2 v8, v10
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v4i32_v4i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX4-NEXT:    vle32.v v10, (a0)
-; LMULMAX4-NEXT:    vzext.vf2 v8, v10
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v4i32_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v10
+; CHECK-NEXT:    ret
   %y = load <4 x i32>, ptr %x
   %z = zext <4 x i32> %y to <4 x i64>
   ret <4 x i64> %z
 }
 
 define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v8i32_v8i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v8i32_v8i8:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v10, 0
-; LMULMAX4-NEXT:    vse8.v v8, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v8i32_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <8 x i32> %x to <8 x i8>
   store <8 x i8> %y, ptr %z
   ret void
 }
 
 define void @truncstore_v8i32_v8i16(<8 x i32> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v8i32_v8i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v8i32_v8i16:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX4-NEXT:    vse16.v v10, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v8i32_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <8 x i32> %x to <8 x i16>
   store <8 x i16> %y, ptr %z
   ret void
 }
 
 define <8 x i64> @sextload_v8i32_v8i64(ptr %x) {
-; LMULMAX1-LABEL: sextload_v8i32_v8i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v10, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v12, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v9, v8
-; LMULMAX1-NEXT:    vsext.vf2 v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v12, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v11, v10
-; LMULMAX1-NEXT:    vsext.vf2 v10, v12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v8i32_v8i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v12, (a0)
-; LMULMAX4-NEXT:    vsext.vf2 v8, v12
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v8i32_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    ret
   %y = load <8 x i32>, ptr %x
   %z = sext <8 x i32> %y to <8 x i64>
   ret <8 x i64> %z
 }
 
 define <8 x i64> @zextload_v8i32_v8i64(ptr %x) {
-; LMULMAX1-LABEL: zextload_v8i32_v8i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v10, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v12, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v9, v8
-; LMULMAX1-NEXT:    vzext.vf2 v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v12, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v11, v10
-; LMULMAX1-NEXT:    vzext.vf2 v10, v12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v8i32_v8i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v12, (a0)
-; LMULMAX4-NEXT:    vzext.vf2 v8, v12
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v8i32_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v12
+; CHECK-NEXT:    ret
   %y = load <8 x i32>, ptr %x
   %z = zext <8 x i32> %y to <8 x i64>
   ret <8 x i64> %z
 }
 
 define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v16i32_v16i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 12, e8, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 8
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 12
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v16i32_v16i8:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v12, 0
-; LMULMAX4-NEXT:    vse8.v v8, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v16i32_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <16 x i32> %x to <16 x i8>
   store <16 x i8> %y, ptr %z
   ret void
 }
 
 define void @truncstore_v16i32_v16i16(<16 x i32> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v16i32_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v10, v9, 4
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse16.v v10, (a1)
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v16i32_v16i16:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
-; LMULMAX4-NEXT:    vse16.v v12, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v16i32_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vse16.v v12, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <16 x i32> %x to <16 x i16>
   store <16 x i16> %y, ptr %z
   ret void
 }
 
 define <16 x i64> @sextload_v16i32_v16i64(ptr %x) {
-; LMULMAX1-LABEL: sextload_v16i32_v16i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 48
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v16, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    vle32.v v14, (a1)
-; LMULMAX1-NEXT:    vle32.v v10, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v12, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v9, v8
-; LMULMAX1-NEXT:    vsext.vf2 v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v12, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v11, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v14, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v13, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v16, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v15, v10
-; LMULMAX1-NEXT:    vsext.vf2 v10, v12
-; LMULMAX1-NEXT:    vsext.vf2 v12, v14
-; LMULMAX1-NEXT:    vsext.vf2 v14, v16
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: sextload_v16i32_v16i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v16, (a0)
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vslidedown.vi v8, v16, 8
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vsext.vf2 v12, v8
-; LMULMAX4-NEXT:    vsext.vf2 v8, v16
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: sextload_v16i32_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle32.v v16, (a0)
+; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    ret
   %y = load <16 x i32>, ptr %x
   %z = sext <16 x i32> %y to <16 x i64>
   ret <16 x i64> %z
 }
 
 define <16 x i64> @zextload_v16i32_v16i64(ptr %x) {
-; LMULMAX1-LABEL: zextload_v16i32_v16i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 48
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v16, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    vle32.v v14, (a1)
-; LMULMAX1-NEXT:    vle32.v v10, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v12, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v9, v8
-; LMULMAX1-NEXT:    vzext.vf2 v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v12, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v11, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v14, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v13, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v16, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v15, v10
-; LMULMAX1-NEXT:    vzext.vf2 v10, v12
-; LMULMAX1-NEXT:    vzext.vf2 v12, v14
-; LMULMAX1-NEXT:    vzext.vf2 v14, v16
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: zextload_v16i32_v16i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vle32.v v16, (a0)
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vslidedown.vi v8, v16, 8
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vzext.vf2 v12, v8
-; LMULMAX4-NEXT:    vzext.vf2 v8, v16
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: zextload_v16i32_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle32.v v16, (a0)
+; CHECK-NEXT:    vzext.vf2 v8, v16
+; CHECK-NEXT:    ret
   %y = load <16 x i32>, ptr %x
   %z = zext <16 x i32> %y to <16 x i64>
   ret <16 x i64> %z
@@ -1442,415 +807,126 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, ptr %z) {
 }
 
 define void @truncstore_v4i64_v4i8(<4 x i64> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v4i64_v4i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v4i64_v4i8:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v10, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX4-NEXT:    vse8.v v8, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v4i64_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <4 x i64> %x to <4 x i8>
   store <4 x i8> %y, ptr %z
   ret void
 }
 
 define void @truncstore_v4i64_v4i16(<4 x i64> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v4i64_v4i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v4i64_v4i16:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v10, 0
-; LMULMAX4-NEXT:    vse16.v v8, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v4i64_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <4 x i64> %x to <4 x i16>
   store <4 x i16> %y, ptr %z
   ret void
 }
 
 define void @truncstore_v4i64_v4i32(<4 x i64> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v4i64_v4i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v4i64_v4i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX4-NEXT:    vse32.v v10, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v4i64_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vse32.v v10, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <4 x i64> %x to <4 x i32>
   store <4 x i32> %y, ptr %z
   ret void
 }
 
 define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v8i64_v8i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 6
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v8i64_v8i8:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v12, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX4-NEXT:    vse8.v v8, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v8i64_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <8 x i64> %x to <8 x i8>
   store <8 x i8> %y, ptr %z
   ret void
 }
 
 define void @truncstore_v8i64_v8i16(<8 x i64> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v8i64_v8i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 6
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v8i64_v8i16:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v12, 0
-; LMULMAX4-NEXT:    vse16.v v8, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v8i64_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <8 x i64> %x to <8 x i16>
   store <8 x i16> %y, ptr %z
   ret void
 }
 
 define void @truncstore_v8i64_v8i32(<8 x i64> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v8i64_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v10, v9, 2
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse32.v v10, (a1)
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v8i64_v8i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
-; LMULMAX4-NEXT:    vse32.v v12, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v8i64_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vse32.v v12, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <8 x i64> %x to <8 x i32>
   store <8 x i32> %y, ptr %z
   ret void
 }
 
 define void @truncstore_v16i64_v16i8(<16 x i64> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v16i64_v16i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 6, e8, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 6
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v12, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 10, e8, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 8
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v13, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 12, e8, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v14, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 14, e8, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 12
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v15, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 14
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v16i64_v16i8:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v16, v12, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v12, v16, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v12, v12, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v14, v8, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v14, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX4-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX4-NEXT:    vslideup.vi v8, v12, 8
-; LMULMAX4-NEXT:    vse8.v v8, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v16i64_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vse8.v v10, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <16 x i64> %x to <16 x i8>
   store <16 x i8> %y, ptr %z
   ret void
 }
 
 define void @truncstore_v16i64_v16i16(<16 x i64> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v16i64_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 6
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v13, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v10, v12, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v10, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v14, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v10, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v15, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v10, v9, 6
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse16.v v10, (a1)
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v16i64_v16i16:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v16, v12, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v12, v16, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v14, v8, 0
-; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v8, v14, 0
-; LMULMAX4-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX4-NEXT:    vslideup.vi v8, v12, 8
-; LMULMAX4-NEXT:    vse16.v v8, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v16i64_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <16 x i64> %x to <16 x i16>
   store <16 x i16> %y, ptr %z
   ret void
 }
 
 define void @truncstore_v16i64_v16i32(<16 x i64> %x, ptr %z) {
-; LMULMAX1-LABEL: truncstore_v16i64_v16i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v10, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v13, 0
-; LMULMAX1-NEXT:    vnsrl.wi v11, v12, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v11, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v15, 0
-; LMULMAX1-NEXT:    vnsrl.wi v12, v14, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v12, v9, 2
-; LMULMAX1-NEXT:    addi a1, a0, 48
-; LMULMAX1-NEXT:    vse32.v v12, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    vse32.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse32.v v10, (a1)
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX4-LABEL: truncstore_v16i64_v16i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX4-NEXT:    vnsrl.wi v16, v12, 0
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vslideup.vi v12, v16, 8
-; LMULMAX4-NEXT:    vse32.v v12, (a0)
-; LMULMAX4-NEXT:    ret
+; CHECK-LABEL: truncstore_v16i64_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vse32.v v16, (a0)
+; CHECK-NEXT:    ret
   %y = trunc <16 x i64> %x to <16 x i32>
   store <16 x i32> %y, ptr %z
   ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
index 88a86bbdab9cd6..d0dc70fd81151d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
 
 define void @fpext_v2f16_v2f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: fpext_v2f16_v2f32:
@@ -35,28 +33,13 @@ define void @fpext_v2f16_v2f64(ptr %x, ptr %y) {
 }
 
 define void @fpext_v8f16_v8f32(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: fpext_v8f16_v8f32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.f.f.v v10, v8
-; LMULMAX8-NEXT:    vse32.v v10, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fpext_v8f16_v8f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.f.f.v v9, v8
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.f.f.v v10, v8
-; LMULMAX1-NEXT:    addi a0, a1, 16
-; LMULMAX1-NEXT:    vse32.v v10, (a0)
-; LMULMAX1-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fpext_v8f16_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vse32.v v10, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %d = fpext <8 x half> %a to <8 x float>
   store <8 x float> %d, ptr %y
@@ -64,50 +47,15 @@ define void @fpext_v8f16_v8f32(ptr %x, ptr %y) {
 }
 
 define void @fpext_v8f16_v8f64(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: fpext_v8f16_v8f64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.f.f.v v10, v8
-; LMULMAX8-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vfwcvt.f.f.v v12, v10
-; LMULMAX8-NEXT:    vse64.v v12, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fpext_v8f16_v8f64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.f.f.v v10, v9
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.f.f.v v9, v10
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.f.f.v v10, v8
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.f.f.v v11, v10
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.f.f.v v10, v8
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.f.f.v v12, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.f.f.v v10, v8
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.f.f.v v8, v10
-; LMULMAX1-NEXT:    addi a0, a1, 48
-; LMULMAX1-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 32
-; LMULMAX1-NEXT:    vse64.v v12, (a0)
-; LMULMAX1-NEXT:    vse64.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fpext_v8f16_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vfwcvt.f.f.v v12, v10
+; CHECK-NEXT:    vse64.v v12, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %d = fpext <8 x half> %a to <8 x double>
   store <8 x double> %d, ptr %y
@@ -145,26 +93,13 @@ define void @fpround_v2f64_v2f16(ptr %x, ptr %y) {
 }
 
 define void @fpround_v8f32_v8f16(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: fpround_v8f32_v8f16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfncvt.f.f.w v10, v8
-; LMULMAX8-NEXT:    vse16.v v10, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fpround_v8f32_v8f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-NEXT:    vfncvt.f.f.w v10, v8
-; LMULMAX1-NEXT:    vfncvt.f.f.w v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v10, v8, 4
-; LMULMAX1-NEXT:    vse16.v v10, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fpround_v8f32_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfncvt.f.f.w v10, v8
+; CHECK-NEXT:    vse16.v v10, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %d = fptrunc <8 x float> %a to <8 x half>
   store <8 x half> %d, ptr %y
@@ -172,49 +107,15 @@ define void @fpround_v8f32_v8f16(ptr %x, ptr %y) {
 }
 
 define void @fpround_v8f64_v8f16(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: fpround_v8f64_v8f16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    vfncvt.rod.f.f.w v12, v8
-; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vfncvt.f.f.w v8, v12
-; LMULMAX8-NEXT:    vse16.v v8, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fpround_v8f64_v8f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a2, a0, 48
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vle64.v v8, (a2)
-; LMULMAX1-NEXT:    addi a2, a0, 32
-; LMULMAX1-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle64.v v11, (a0)
-; LMULMAX1-NEXT:    vfncvt.rod.f.f.w v12, v9
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v9, v12
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rod.f.f.w v12, v11
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rod.f.f.w v11, v10
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rod.f.f.w v10, v8
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v8, 6
-; LMULMAX1-NEXT:    vse16.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fpround_v8f64_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %d = fptrunc <8 x double> %a to <8 x half>
   store <8 x half> %d, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll
index 1ccb089bbff424..dc907eed16cce6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,LMULMAX2,RV32-LMULMAX2
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,LMULMAX2,RV64-LMULMAX2
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,LMULMAX1,RV32-LMULMAX1
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,LMULMAX1,RV64-LMULMAX1
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
 
 define void @splat_v8f16(ptr %x, half %y) {
 ; CHECK-LABEL: splat_v8f16:
@@ -44,21 +42,12 @@ define void @splat_v2f64(ptr %x, double %y) {
 }
 
 define void @splat_16f16(ptr %x, half %y) {
-; LMULMAX2-LABEL: splat_16f16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vfmv.v.f v8, fa0
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_16f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vfmv.v.f v8, fa0
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa0
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <16 x half> poison, half %y, i32 0
   %b = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> zeroinitializer
   store <16 x half> %b, ptr %x
@@ -66,21 +55,12 @@ define void @splat_16f16(ptr %x, half %y) {
 }
 
 define void @splat_v8f32(ptr %x, float %y) {
-; LMULMAX2-LABEL: splat_v8f32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vfmv.v.f v8, fa0
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_v8f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vfmv.v.f v8, fa0
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa0
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <8 x float> poison, float %y, i32 0
   %b = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> zeroinitializer
   store <8 x float> %b, ptr %x
@@ -88,21 +68,12 @@ define void @splat_v8f32(ptr %x, float %y) {
 }
 
 define void @splat_v4f64(ptr %x, double %y) {
-; LMULMAX2-LABEL: splat_v4f64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vfmv.v.f v8, fa0
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_v4f64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vfmv.v.f v8, fa0
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse64.v v8, (a1)
-; LMULMAX1-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa0
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <4 x double> poison, double %y, i32 0
   %b = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> zeroinitializer
   store <4 x double> %b, ptr %x
@@ -149,21 +120,12 @@ define void @splat_zero_v2f64(ptr %x) {
 }
 
 define void @splat_zero_16f16(ptr %x) {
-; LMULMAX2-LABEL: splat_zero_16f16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_zero_16f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_zero_16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <16 x half> poison, half 0.0, i32 0
   %b = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> zeroinitializer
   store <16 x half> %b, ptr %x
@@ -171,21 +133,12 @@ define void @splat_zero_16f16(ptr %x) {
 }
 
 define void @splat_zero_v8f32(ptr %x) {
-; LMULMAX2-LABEL: splat_zero_v8f32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_zero_v8f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_zero_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <8 x float> poison, float 0.0, i32 0
   %b = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> zeroinitializer
   store <8 x float> %b, ptr %x
@@ -193,21 +146,12 @@ define void @splat_zero_v8f32(ptr %x) {
 }
 
 define void @splat_zero_v4f64(ptr %x) {
-; LMULMAX2-LABEL: splat_zero_v4f64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_zero_v4f64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_zero_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <4 x double> poison, double 0.0, i32 0
   %b = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> zeroinitializer
   store <4 x double> %b, ptr %x
@@ -267,23 +211,13 @@ define void @splat_negzero_v2f64(ptr %x) {
 }
 
 define void @splat_negzero_16f16(ptr %x) {
-; LMULMAX2-LABEL: splat_negzero_16f16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    lui a1, 1048568
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_negzero_16f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    lui a1, 1048568
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_negzero_16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 1048568
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <16 x half> poison, half -0.0, i32 0
   %b = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> zeroinitializer
   store <16 x half> %b, ptr %x
@@ -291,23 +225,13 @@ define void @splat_negzero_16f16(ptr %x) {
 }
 
 define void @splat_negzero_v8f32(ptr %x) {
-; LMULMAX2-LABEL: splat_negzero_v8f32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    lui a1, 524288
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_negzero_v8f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    lui a1, 524288
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_negzero_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, 524288
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <8 x float> poison, float -0.0, i32 0
   %b = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> zeroinitializer
   store <8 x float> %b, ptr %x
@@ -315,45 +239,23 @@ define void @splat_negzero_v8f32(ptr %x) {
 }
 
 define void @splat_negzero_v4f64(ptr %x) {
-; RV32-LMULMAX2-LABEL: splat_negzero_v4f64:
-; RV32-LMULMAX2:       # %bb.0:
-; RV32-LMULMAX2-NEXT:    fcvt.d.w fa5, zero
-; RV32-LMULMAX2-NEXT:    fneg.d fa5, fa5
-; RV32-LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-LMULMAX2-NEXT:    vfmv.v.f v8, fa5
-; RV32-LMULMAX2-NEXT:    vse64.v v8, (a0)
-; RV32-LMULMAX2-NEXT:    ret
-;
-; RV64-LMULMAX2-LABEL: splat_negzero_v4f64:
-; RV64-LMULMAX2:       # %bb.0:
-; RV64-LMULMAX2-NEXT:    li a1, -1
-; RV64-LMULMAX2-NEXT:    slli a1, a1, 63
-; RV64-LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-LMULMAX2-NEXT:    vmv.v.x v8, a1
-; RV64-LMULMAX2-NEXT:    vse64.v v8, (a0)
-; RV64-LMULMAX2-NEXT:    ret
-;
-; RV32-LMULMAX1-LABEL: splat_negzero_v4f64:
-; RV32-LMULMAX1:       # %bb.0:
-; RV32-LMULMAX1-NEXT:    fcvt.d.w fa5, zero
-; RV32-LMULMAX1-NEXT:    fneg.d fa5, fa5
-; RV32-LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-LMULMAX1-NEXT:    vfmv.v.f v8, fa5
-; RV32-LMULMAX1-NEXT:    addi a1, a0, 16
-; RV32-LMULMAX1-NEXT:    vse64.v v8, (a1)
-; RV32-LMULMAX1-NEXT:    vse64.v v8, (a0)
-; RV32-LMULMAX1-NEXT:    ret
+; CHECK-RV32-LABEL: splat_negzero_v4f64:
+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    fcvt.d.w fa5, zero
+; CHECK-RV32-NEXT:    fneg.d fa5, fa5
+; CHECK-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-RV32-NEXT:    vfmv.v.f v8, fa5
+; CHECK-RV32-NEXT:    vse64.v v8, (a0)
+; CHECK-RV32-NEXT:    ret
 ;
-; RV64-LMULMAX1-LABEL: splat_negzero_v4f64:
-; RV64-LMULMAX1:       # %bb.0:
-; RV64-LMULMAX1-NEXT:    li a1, -1
-; RV64-LMULMAX1-NEXT:    slli a1, a1, 63
-; RV64-LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-LMULMAX1-NEXT:    vmv.v.x v8, a1
-; RV64-LMULMAX1-NEXT:    addi a1, a0, 16
-; RV64-LMULMAX1-NEXT:    vse64.v v8, (a1)
-; RV64-LMULMAX1-NEXT:    vse64.v v8, (a0)
-; RV64-LMULMAX1-NEXT:    ret
+; CHECK-RV64-LABEL: splat_negzero_v4f64:
+; CHECK-RV64:       # %bb.0:
+; CHECK-RV64-NEXT:    li a1, -1
+; CHECK-RV64-NEXT:    slli a1, a1, 63
+; CHECK-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-RV64-NEXT:    vmv.v.x v8, a1
+; CHECK-RV64-NEXT:    vse64.v v8, (a0)
+; CHECK-RV64-NEXT:    ret
   %a = insertelement <4 x double> poison, double -0.0, i32 0
   %b = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> zeroinitializer
   store <4 x double> %b, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll
index 36294af97469fb..de7dfab1dfcff1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
 
 define void @gather_const_v8f16(ptr %x) {
 ; CHECK-LABEL: gather_const_v8f16:
@@ -52,36 +50,14 @@ define void @gather_const_v2f64(ptr %x) {
 }
 
 define void @gather_const_v64f16(ptr %x) {
-; LMULMAX8-LABEL: gather_const_v64f16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 64
-; LMULMAX8-NEXT:    addi a2, a0, 94
-; LMULMAX8-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
-; LMULMAX8-NEXT:    vlse16.v v8, (a2), zero
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: gather_const_v64f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 80
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    addi a3, a0, 48
-; LMULMAX1-NEXT:    addi a4, a0, 32
-; LMULMAX1-NEXT:    addi a5, a0, 94
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vlse16.v v8, (a5), zero
-; LMULMAX1-NEXT:    addi a5, a0, 64
-; LMULMAX1-NEXT:    addi a6, a0, 112
-; LMULMAX1-NEXT:    addi a7, a0, 96
-; LMULMAX1-NEXT:    vse16.v v8, (a7)
-; LMULMAX1-NEXT:    vse16.v v8, (a6)
-; LMULMAX1-NEXT:    vse16.v v8, (a5)
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    vse16.v v8, (a4)
-; LMULMAX1-NEXT:    vse16.v v8, (a3)
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    vse16.v v8, (a2)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: gather_const_v64f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:    addi a2, a0, 94
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a2), zero
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <64 x half>, ptr %x
   %b = extractelement <64 x half> %a, i32 47
   %c = insertelement <64 x half> poison, half %b, i32 0
@@ -91,36 +67,14 @@ define void @gather_const_v64f16(ptr %x) {
 }
 
 define void @gather_const_v32f32(ptr %x) {
-; LMULMAX8-LABEL: gather_const_v32f32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    addi a2, a0, 68
-; LMULMAX8-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vlse32.v v8, (a2), zero
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: gather_const_v32f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 64
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    addi a3, a0, 48
-; LMULMAX1-NEXT:    addi a4, a0, 32
-; LMULMAX1-NEXT:    addi a5, a0, 68
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vlse32.v v8, (a5), zero
-; LMULMAX1-NEXT:    addi a5, a0, 80
-; LMULMAX1-NEXT:    addi a6, a0, 112
-; LMULMAX1-NEXT:    addi a7, a0, 96
-; LMULMAX1-NEXT:    vse32.v v8, (a7)
-; LMULMAX1-NEXT:    vse32.v v8, (a6)
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    vse32.v v8, (a5)
-; LMULMAX1-NEXT:    vse32.v v8, (a4)
-; LMULMAX1-NEXT:    vse32.v v8, (a3)
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    vse32.v v8, (a2)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: gather_const_v32f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    addi a2, a0, 68
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a2), zero
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x float>, ptr %x
   %b = extractelement <32 x float> %a, i32 17
   %c = insertelement <32 x float> poison, float %b, i32 0
@@ -130,34 +84,13 @@ define void @gather_const_v32f32(ptr %x) {
 }
 
 define void @gather_const_v16f64(ptr %x) {
-; LMULMAX8-LABEL: gather_const_v16f64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    addi a1, a0, 80
-; LMULMAX8-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; LMULMAX8-NEXT:    vlse64.v v8, (a1), zero
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: gather_const_v16f64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 80
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    addi a3, a0, 48
-; LMULMAX1-NEXT:    addi a4, a0, 32
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vlse64.v v8, (a1), zero
-; LMULMAX1-NEXT:    addi a5, a0, 64
-; LMULMAX1-NEXT:    addi a6, a0, 112
-; LMULMAX1-NEXT:    addi a7, a0, 96
-; LMULMAX1-NEXT:    vse64.v v8, (a7)
-; LMULMAX1-NEXT:    vse64.v v8, (a6)
-; LMULMAX1-NEXT:    vse64.v v8, (a5)
-; LMULMAX1-NEXT:    vse64.v v8, (a1)
-; LMULMAX1-NEXT:    vse64.v v8, (a4)
-; LMULMAX1-NEXT:    vse64.v v8, (a3)
-; LMULMAX1-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-NEXT:    vse64.v v8, (a2)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: gather_const_v16f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a1, a0, 80
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vlse64.v v8, (a1), zero
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x double>, ptr %x
   %b = extractelement <16 x double> %a, i32 10
   %c = insertelement <16 x double> poison, double %b, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index dd79311b78ba79..0f003d7af6100e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -1,21 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV64
-
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
 
 define void @fadd_v8f16(ptr %x, ptr %y) {
 ; ZVFH-LABEL: fadd_v8f16:
@@ -59,79 +53,42 @@ define void @fadd_v6f16(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fadd_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfadd.vv v8, v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fadd_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfadd.vv v8, v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fadd_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfadd.vv v8, v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fadd_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfadd.vv v8, v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fadd_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfadd.vv v8, v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fadd_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfadd.vv v8, v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fadd <6 x half> %a, %b
@@ -222,79 +179,42 @@ define void @fsub_v6f16(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fsub_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsub.vv v8, v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fsub_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsub.vv v8, v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fsub_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsub.vv v8, v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fsub_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsub.vv v8, v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fsub_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfsub.vv v8, v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fsub_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfsub.vv v8, v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fsub <6 x half> %a, %b
@@ -385,79 +305,42 @@ define void @fmul_v6f16(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fmul_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmul.vv v8, v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fmul_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmul.vv v8, v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fmul_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmul.vv v8, v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fmul_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmul.vv v8, v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fmul_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmul.vv v8, v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fmul_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmul.vv v8, v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fmul <6 x half> %a, %b
@@ -548,79 +431,42 @@ define void @fdiv_v6f16(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfdiv.vv v8, v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfdiv.vv v8, v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfdiv.vv v8, v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfdiv.vv v8, v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fdiv_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfdiv.vv v8, v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fdiv_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfdiv.vv v8, v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fdiv <6 x half> %a, %b
@@ -706,71 +552,38 @@ define void @fneg_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fneg_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfneg.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fneg_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfneg.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fneg_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfneg.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fneg_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfneg.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fneg_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfneg.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fneg_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfneg.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = fneg <6 x half> %a
   store <6 x half> %b, ptr %x
@@ -851,71 +664,38 @@ define void @fabs_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fabs_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fabs_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fabs_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fabs_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -1004,79 +784,42 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: copysign_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v8, v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: copysign_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v8, v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: copysign_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: copysign_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: copysign_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: copysign_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
@@ -1174,99 +917,52 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: copysign_vf_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v8, v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: copysign_vf_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v8, v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: copysign_vf_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: copysign_vf_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -1363,99 +1059,52 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: copysign_neg_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfneg.v v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: copysign_neg_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfneg.v v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: copysign_neg_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfneg.v v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: copysign_neg_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfneg.v v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfneg.v v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfneg.v v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fneg <6 x half> %b
@@ -1560,133 +1209,69 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi sp, sp, -16
-; ZVFHMINLMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle32.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfneg.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v8, v10, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, sp, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fsh fa5, 4(a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi sp, sp, 16
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi sp, sp, -16
-; ZVFHMINLMULMAX2-RV64-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    mv a2, sp
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a2)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle32.v v9, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfneg.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v8, v10, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a1, sp, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse16.v v9, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fsh fa5, 4(a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi sp, sp, 16
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi sp, sp, -16
-; ZVFHMINLMULMAX1-RV32-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle32.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfneg.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v10, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, sp, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fsh fa5, 4(a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi sp, sp, 16
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi sp, sp, -16
-; ZVFHMINLMULMAX1-RV64-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    mv a2, sp
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a2)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle32.v v9, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfneg.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v10, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a1, sp, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse16.v v9, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fsh fa5, 4(a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi sp, sp, 16
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    addi sp, sp, -16
+; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle32.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfneg.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v8, v10, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    addi a1, sp, 8
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-RV32-NEXT:    fsh fa5, 4(a0)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse32.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    addi sp, sp, 16
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    addi sp, sp, -16
+; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle64.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    mv a2, sp
+; ZVFHMIN-RV64-NEXT:    vse64.v v8, (a2)
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle32.v v9, (a1)
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfneg.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v8, v10, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    addi a1, sp, 8
+; ZVFHMIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-RV64-NEXT:    fsh fa5, 4(a0)
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse32.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    addi sp, sp, 16
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <3 x half>, ptr %x
   %b = load <3 x float>, ptr %y
   %c = fneg <3 x float> %b
@@ -1755,71 +1340,38 @@ define void @sqrt_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: sqrt_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsqrt.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: sqrt_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsqrt.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: sqrt_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsqrt.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: sqrt_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsqrt.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: sqrt_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfsqrt.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: sqrt_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfsqrt.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -1913,87 +1465,46 @@ define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vse16.v v10, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fma_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v10, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmadd.vv v9, v8, v11
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fma_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v10, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmadd.vv v9, v8, v11
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fma_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v10, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmadd.vv v9, v8, v11
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fma_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v10, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmadd.vv v9, v8, v11
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fma_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmadd.vv v9, v8, v11
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fma_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmadd.vv v9, v8, v11
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = load <6 x half>, ptr %z
@@ -2104,107 +1615,56 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vse16.v v10, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v10, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfneg.v v8, v11
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v10, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfneg.v v8, v11
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v10, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfneg.v v8, v11
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v10, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfneg.v v8, v11
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fmsub_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfneg.v v8, v11
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v11, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmacc.vv v10, v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fmsub_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfneg.v v8, v11
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v11, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmacc.vv v10, v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = load <6 x half>, ptr %z
@@ -2264,58 +1724,28 @@ define void @fnmadd_v2f64(ptr %x, ptr %y, ptr %z) {
 }
 
 define void @fadd_v16f16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fadd_v16f16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vfadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fadd_v16f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfadd.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fadd_v16f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfadd.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-LABEL: fadd_v16f16:
-; ZVFHMINLMULMAX2:       # %bb.0:
-; ZVFHMINLMULMAX2-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfadd.vv v8, v12, v10
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vse16.v v10, (a0)
-; ZVFHMINLMULMAX2-NEXT:    ret
+; ZVFH-LABEL: fadd_v16f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0)
+; ZVFH-NEXT:    vle16.v v10, (a1)
+; ZVFH-NEXT:    vfadd.vv v8, v8, v10
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: fadd_v16f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
   %c = fadd <16 x half> %a, %b
@@ -2324,44 +1754,14 @@ define void @fadd_v16f16(ptr %x, ptr %y) {
 }
 
 define void @fadd_v8f32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fadd_v8f32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vfadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fadd_v8f32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfadd.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fadd_v8f32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfadd.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; ZVFH-LABEL: fadd_v8f32:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; ZVFH-NEXT:    vle32.v v8, (a0)
+; ZVFH-NEXT:    vle32.v v10, (a1)
+; ZVFH-NEXT:    vfadd.vv v8, v8, v10
+; ZVFH-NEXT:    vse32.v v8, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fadd_v8f32:
 ; ZVFHMIN:       # %bb.0:
@@ -2379,44 +1779,14 @@ define void @fadd_v8f32(ptr %x, ptr %y) {
 }
 
 define void @fadd_v4f64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fadd_v4f64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vfadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fadd_v4f64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfadd.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fadd_v4f64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfadd.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; ZVFH-LABEL: fadd_v4f64:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-NEXT:    vle64.v v8, (a0)
+; ZVFH-NEXT:    vle64.v v10, (a1)
+; ZVFH-NEXT:    vfadd.vv v8, v8, v10
+; ZVFH-NEXT:    vse64.v v8, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fadd_v4f64:
 ; ZVFHMIN:       # %bb.0:
@@ -2434,58 +1804,28 @@ define void @fadd_v4f64(ptr %x, ptr %y) {
 }
 
 define void @fsub_v16f16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fsub_v16f16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vfsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fsub_v16f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfsub.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fsub_v16f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfsub.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfsub.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-LABEL: fsub_v16f16:
-; ZVFHMINLMULMAX2:       # %bb.0:
-; ZVFHMINLMULMAX2-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfsub.vv v8, v12, v10
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vse16.v v10, (a0)
-; ZVFHMINLMULMAX2-NEXT:    ret
+; ZVFH-LABEL: fsub_v16f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0)
+; ZVFH-NEXT:    vle16.v v10, (a1)
+; ZVFH-NEXT:    vfsub.vv v8, v8, v10
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: fsub_v16f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
   %c = fsub <16 x half> %a, %b
@@ -2494,44 +1834,14 @@ define void @fsub_v16f16(ptr %x, ptr %y) {
 }
 
 define void @fsub_v8f32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fsub_v8f32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vfsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fsub_v8f32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfsub.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fsub_v8f32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfsub.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfsub.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; ZVFH-LABEL: fsub_v8f32:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; ZVFH-NEXT:    vle32.v v8, (a0)
+; ZVFH-NEXT:    vle32.v v10, (a1)
+; ZVFH-NEXT:    vfsub.vv v8, v8, v10
+; ZVFH-NEXT:    vse32.v v8, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fsub_v8f32:
 ; ZVFHMIN:       # %bb.0:
@@ -2549,44 +1859,14 @@ define void @fsub_v8f32(ptr %x, ptr %y) {
 }
 
 define void @fsub_v4f64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fsub_v4f64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vfsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fsub_v4f64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfsub.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fsub_v4f64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfsub.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfsub.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; ZVFH-LABEL: fsub_v4f64:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-NEXT:    vle64.v v8, (a0)
+; ZVFH-NEXT:    vle64.v v10, (a1)
+; ZVFH-NEXT:    vfsub.vv v8, v8, v10
+; ZVFH-NEXT:    vse64.v v8, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fsub_v4f64:
 ; ZVFHMIN:       # %bb.0:
@@ -2604,58 +1884,28 @@ define void @fsub_v4f64(ptr %x, ptr %y) {
 }
 
 define void @fmul_v16f16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fmul_v16f16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vfmul.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fmul_v16f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfmul.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfmul.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fmul_v16f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfmul.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfmul.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-LABEL: fmul_v16f16:
-; ZVFHMINLMULMAX2:       # %bb.0:
-; ZVFHMINLMULMAX2-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfmul.vv v8, v12, v10
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vse16.v v10, (a0)
-; ZVFHMINLMULMAX2-NEXT:    ret
+; ZVFH-LABEL: fmul_v16f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0)
+; ZVFH-NEXT:    vle16.v v10, (a1)
+; ZVFH-NEXT:    vfmul.vv v8, v8, v10
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: fmul_v16f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
   %c = fmul <16 x half> %a, %b
@@ -2664,44 +1914,14 @@ define void @fmul_v16f16(ptr %x, ptr %y) {
 }
 
 define void @fmul_v8f32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fmul_v8f32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vfmul.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fmul_v8f32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfmul.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfmul.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fmul_v8f32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfmul.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfmul.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; ZVFH-LABEL: fmul_v8f32:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; ZVFH-NEXT:    vle32.v v8, (a0)
+; ZVFH-NEXT:    vle32.v v10, (a1)
+; ZVFH-NEXT:    vfmul.vv v8, v8, v10
+; ZVFH-NEXT:    vse32.v v8, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fmul_v8f32:
 ; ZVFHMIN:       # %bb.0:
@@ -2719,44 +1939,14 @@ define void @fmul_v8f32(ptr %x, ptr %y) {
 }
 
 define void @fmul_v4f64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fmul_v4f64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vfmul.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fmul_v4f64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfmul.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfmul.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fmul_v4f64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfmul.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfmul.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; ZVFH-LABEL: fmul_v4f64:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-NEXT:    vle64.v v8, (a0)
+; ZVFH-NEXT:    vle64.v v10, (a1)
+; ZVFH-NEXT:    vfmul.vv v8, v8, v10
+; ZVFH-NEXT:    vse64.v v8, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fmul_v4f64:
 ; ZVFHMIN:       # %bb.0:
@@ -2774,58 +1964,28 @@ define void @fmul_v4f64(ptr %x, ptr %y) {
 }
 
 define void @fdiv_v16f16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fdiv_v16f16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vfdiv.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fdiv_v16f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfdiv.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfdiv.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fdiv_v16f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfdiv.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfdiv.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-LABEL: fdiv_v16f16:
-; ZVFHMINLMULMAX2:       # %bb.0:
-; ZVFHMINLMULMAX2-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfdiv.vv v8, v12, v10
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vse16.v v10, (a0)
-; ZVFHMINLMULMAX2-NEXT:    ret
+; ZVFH-LABEL: fdiv_v16f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0)
+; ZVFH-NEXT:    vle16.v v10, (a1)
+; ZVFH-NEXT:    vfdiv.vv v8, v8, v10
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: fdiv_v16f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
   %c = fdiv <16 x half> %a, %b
@@ -2834,44 +1994,14 @@ define void @fdiv_v16f16(ptr %x, ptr %y) {
 }
 
 define void @fdiv_v8f32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fdiv_v8f32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vfdiv.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fdiv_v8f32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfdiv.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfdiv.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fdiv_v8f32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfdiv.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfdiv.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; ZVFH-LABEL: fdiv_v8f32:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; ZVFH-NEXT:    vle32.v v8, (a0)
+; ZVFH-NEXT:    vle32.v v10, (a1)
+; ZVFH-NEXT:    vfdiv.vv v8, v8, v10
+; ZVFH-NEXT:    vse32.v v8, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fdiv_v8f32:
 ; ZVFHMIN:       # %bb.0:
@@ -2889,44 +2019,14 @@ define void @fdiv_v8f32(ptr %x, ptr %y) {
 }
 
 define void @fdiv_v4f64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: fdiv_v4f64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vfdiv.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: fdiv_v4f64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vfdiv.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vfdiv.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fdiv_v4f64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vfdiv.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vfdiv.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; ZVFH-LABEL: fdiv_v4f64:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-NEXT:    vle64.v v8, (a0)
+; ZVFH-NEXT:    vle64.v v10, (a1)
+; ZVFH-NEXT:    vfdiv.vv v8, v8, v10
+; ZVFH-NEXT:    vse64.v v8, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fdiv_v4f64:
 ; ZVFHMIN:       # %bb.0:
@@ -2944,37 +2044,25 @@ define void @fdiv_v4f64(ptr %x, ptr %y) {
 }
 
 define void @fneg_v16f16(ptr %x) {
-; LMULMAX2-LABEL: fneg_v16f16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vfneg.v v8, v8
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: fneg_v16f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-NEXT:    vfneg.v v8, v8
-; LMULMAX1-NEXT:    vfneg.v v9, v9
-; LMULMAX1-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-LABEL: fneg_v16f16:
-; ZVFHMINLMULMAX2:       # %bb.0:
-; ZVFHMINLMULMAX2-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfneg.v v8, v10
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vse16.v v10, (a0)
-; ZVFHMINLMULMAX2-NEXT:    ret
+; ZVFH-LABEL: fneg_v16f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0)
+; ZVFH-NEXT:    vfneg.v v8, v8
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: fneg_v16f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfneg.v v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = fneg <16 x half> %a
   store <16 x half> %b, ptr %x
@@ -2982,25 +2070,13 @@ define void @fneg_v16f16(ptr %x) {
 }
 
 define void @fneg_v8f32(ptr %x) {
-; LMULMAX2-LABEL: fneg_v8f32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vfneg.v v8, v8
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: fneg_v8f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vfneg.v v8, v8
-; LMULMAX1-NEXT:    vfneg.v v9, v9
-; LMULMAX1-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; ZVFH-LABEL: fneg_v8f32:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; ZVFH-NEXT:    vle32.v v8, (a0)
+; ZVFH-NEXT:    vfneg.v v8, v8
+; ZVFH-NEXT:    vse32.v v8, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fneg_v8f32:
 ; ZVFHMIN:       # %bb.0:
@@ -3016,25 +2092,13 @@ define void @fneg_v8f32(ptr %x) {
 }
 
 define void @fneg_v4f64(ptr %x) {
-; LMULMAX2-LABEL: fneg_v4f64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vfneg.v v8, v8
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: fneg_v4f64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle64.v v8, (a1)
-; LMULMAX1-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-NEXT:    vfneg.v v8, v8
-; LMULMAX1-NEXT:    vfneg.v v9, v9
-; LMULMAX1-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-NEXT:    vse64.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; ZVFH-LABEL: fneg_v4f64:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-NEXT:    vle64.v v8, (a0)
+; ZVFH-NEXT:    vfneg.v v8, v8
+; ZVFH-NEXT:    vse64.v v8, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fneg_v4f64:
 ; ZVFHMIN:       # %bb.0:
@@ -3050,49 +2114,31 @@ define void @fneg_v4f64(ptr %x) {
 }
 
 define void @fma_v16f16(ptr %x, ptr %y, ptr %z) {
-; LMULMAX2-LABEL: fma_v16f16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vle16.v v12, (a2)
-; LMULMAX2-NEXT:    vfmacc.vv v12, v8, v10
-; LMULMAX2-NEXT:    vse16.v v12, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: fma_v16f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-NEXT:    addi a3, a0, 16
-; LMULMAX1-NEXT:    vle16.v v9, (a3)
-; LMULMAX1-NEXT:    vle16.v v10, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, a2, 16
-; LMULMAX1-NEXT:    vle16.v v12, (a1)
-; LMULMAX1-NEXT:    vle16.v v13, (a2)
-; LMULMAX1-NEXT:    vfmacc.vv v12, v9, v11
-; LMULMAX1-NEXT:    vfmacc.vv v13, v8, v10
-; LMULMAX1-NEXT:    vse16.v v13, (a0)
-; LMULMAX1-NEXT:    vse16.v v12, (a3)
-; LMULMAX1-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-LABEL: fma_v16f16:
-; ZVFHMINLMULMAX2:       # %bb.0:
-; ZVFHMINLMULMAX2-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v8, (a2)
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-NEXT:    vle16.v v10, (a1)
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMINLMULMAX2-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMINLMULMAX2-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMINLMULMAX2-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMINLMULMAX2-NEXT:    vse16.v v10, (a0)
-; ZVFHMINLMULMAX2-NEXT:    ret
+; ZVFH-LABEL: fma_v16f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0)
+; ZVFH-NEXT:    vle16.v v10, (a1)
+; ZVFH-NEXT:    vle16.v v12, (a2)
+; ZVFH-NEXT:    vfmacc.vv v12, v8, v10
+; ZVFH-NEXT:    vse16.v v12, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: fma_v16f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a2)
+; ZVFHMIN-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-NEXT:    vle16.v v10, (a1)
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT:    vse16.v v10, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = load <16 x half>, ptr %y
   %c = load <16 x half>, ptr %z
@@ -3103,33 +2149,15 @@ define void @fma_v16f16(ptr %x, ptr %y, ptr %z) {
 declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
 
 define void @fma_v8f32(ptr %x, ptr %y, ptr %z) {
-; LMULMAX2-LABEL: fma_v8f32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vle32.v v12, (a2)
-; LMULMAX2-NEXT:    vfmacc.vv v12, v8, v10
-; LMULMAX2-NEXT:    vse32.v v12, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: fma_v8f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a3, a0, 16
-; LMULMAX1-NEXT:    vle32.v v9, (a3)
-; LMULMAX1-NEXT:    vle32.v v10, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, a2, 16
-; LMULMAX1-NEXT:    vle32.v v12, (a1)
-; LMULMAX1-NEXT:    vle32.v v13, (a2)
-; LMULMAX1-NEXT:    vfmacc.vv v12, v9, v11
-; LMULMAX1-NEXT:    vfmacc.vv v13, v8, v10
-; LMULMAX1-NEXT:    vse32.v v13, (a0)
-; LMULMAX1-NEXT:    vse32.v v12, (a3)
-; LMULMAX1-NEXT:    ret
+; ZVFH-LABEL: fma_v8f32:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; ZVFH-NEXT:    vle32.v v8, (a0)
+; ZVFH-NEXT:    vle32.v v10, (a1)
+; ZVFH-NEXT:    vle32.v v12, (a2)
+; ZVFH-NEXT:    vfmacc.vv v12, v8, v10
+; ZVFH-NEXT:    vse32.v v12, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fma_v8f32:
 ; ZVFHMIN:       # %bb.0:
@@ -3150,33 +2178,15 @@ define void @fma_v8f32(ptr %x, ptr %y, ptr %z) {
 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
 
 define void @fma_v4f64(ptr %x, ptr %y, ptr %z) {
-; LMULMAX2-LABEL: fma_v4f64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vle64.v v12, (a2)
-; LMULMAX2-NEXT:    vfmacc.vv v12, v8, v10
-; LMULMAX2-NEXT:    vse64.v v12, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: fma_v4f64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-NEXT:    addi a3, a0, 16
-; LMULMAX1-NEXT:    vle64.v v9, (a3)
-; LMULMAX1-NEXT:    vle64.v v10, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, a2, 16
-; LMULMAX1-NEXT:    vle64.v v12, (a1)
-; LMULMAX1-NEXT:    vle64.v v13, (a2)
-; LMULMAX1-NEXT:    vfmacc.vv v12, v9, v11
-; LMULMAX1-NEXT:    vfmacc.vv v13, v8, v10
-; LMULMAX1-NEXT:    vse64.v v13, (a0)
-; LMULMAX1-NEXT:    vse64.v v12, (a3)
-; LMULMAX1-NEXT:    ret
+; ZVFH-LABEL: fma_v4f64:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-NEXT:    vle64.v v8, (a0)
+; ZVFH-NEXT:    vle64.v v10, (a1)
+; ZVFH-NEXT:    vle64.v v12, (a2)
+; ZVFH-NEXT:    vfmacc.vv v12, v8, v10
+; ZVFH-NEXT:    vse64.v v12, (a0)
+; ZVFH-NEXT:    ret
 ;
 ; ZVFHMIN-LABEL: fma_v4f64:
 ; ZVFHMIN:       # %bb.0:
@@ -3242,99 +2252,52 @@ define void @fadd_vf_v6f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fadd_vf_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfadd.vv v8, v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fadd_vf_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfadd.vv v8, v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fadd_vf_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfadd.vv v8, v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fadd_vf_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfadd.vv v8, v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fadd_vf_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfadd.vv v8, v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fadd_vf_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfadd.vv v8, v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -3429,99 +2392,52 @@ define void @fadd_fv_v6f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fadd_fv_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfadd.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fadd_fv_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfadd.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fadd_fv_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfadd.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fadd_fv_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfadd.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fadd_fv_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfadd.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fadd_fv_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfadd.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -3616,99 +2532,52 @@ define void @fsub_vf_v6f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fsub_vf_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsub.vv v8, v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fsub_vf_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsub.vv v8, v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fsub_vf_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsub.vv v8, v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fsub_vf_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsub.vv v8, v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fsub_vf_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfsub.vv v8, v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fsub_vf_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfsub.vv v8, v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -3803,99 +2672,52 @@ define void @fsub_fv_v6f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fsub_fv_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsub.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fsub_fv_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsub.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fsub_fv_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsub.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fsub_fv_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsub.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fsub_fv_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfsub.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fsub_fv_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfsub.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -3990,99 +2812,52 @@ define void @fmul_vf_v6f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fmul_vf_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmul.vv v8, v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fmul_vf_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmul.vv v8, v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fmul_vf_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmul.vv v8, v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fmul_vf_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmul.vv v8, v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fmul_vf_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmul.vv v8, v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fmul_vf_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmul.vv v8, v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -4177,99 +2952,52 @@ define void @fmul_fv_v6f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fmul_fv_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmul.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fmul_fv_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmul.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fmul_fv_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmul.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fmul_fv_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmul.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fmul_fv_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmul.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fmul_fv_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmul.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -4364,99 +3092,52 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_vf_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfdiv.vv v8, v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_vf_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfdiv.vv v8, v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_vf_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfdiv.vv v8, v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_vf_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfdiv.vv v8, v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fdiv_vf_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfdiv.vv v8, v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fdiv_vf_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfdiv.vv v8, v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -4551,99 +3232,52 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_fv_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfdiv.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_fv_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfdiv.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_fv_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfdiv.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_fv_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v9, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfdiv.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fdiv_fv_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfdiv.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fdiv_fv_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v9, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfdiv.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -4743,107 +3377,56 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFH-NEXT:    vse16.v v9, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fma_vf_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmadd.vv v8, v9, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fma_vf_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmadd.vv v8, v9, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fma_vf_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmadd.vv v8, v9, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fma_vf_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmadd.vv v8, v9, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fma_vf_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v10, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v11, v10
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmadd.vv v8, v9, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fma_vf_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v10, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v11, v10
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmadd.vv v8, v9, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = insertelement <6 x half> poison, half %z, i32 0
@@ -4949,107 +3532,56 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFH-NEXT:    vse16.v v9, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fma_fv_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmadd.vv v8, v9, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fma_fv_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmadd.vv v8, v9, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fma_fv_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmadd.vv v8, v9, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fma_fv_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmadd.vv v8, v9, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fma_fv_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v10, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v11, v10
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmadd.vv v8, v9, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fma_fv_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v10, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v11, v10
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmadd.vv v8, v9, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = insertelement <6 x half> poison, half %z, i32 0
@@ -5161,127 +3693,66 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFH-NEXT:    vse16.v v9, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_vf_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfneg.v v9, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v11, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmacc.vv v11, v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v8, v11
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_vf_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfneg.v v9, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v11, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmacc.vv v11, v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v8, v11
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_vf_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfneg.v v9, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v11, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmacc.vv v11, v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v8, v11
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_vf_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    fcvt.s.h fa5, fa0
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmv.v.f v10, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v11, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfneg.v v9, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v11, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmacc.vv v11, v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v8, v11
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmv.v.f v10, fa5
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v11, v10
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfneg.v v9, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmacc.vv v11, v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-RV64-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmv.v.f v10, fa5
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v11, v10
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfneg.v v9, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmacc.vv v11, v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v8, v11
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = insertelement <6 x half> poison, half %z, i32 0
@@ -5558,107 +4029,56 @@ define void @ceil_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: ceil_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX2-RV32-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX2-RV32-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    fsrmi a1, 3
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV32-NEXT:    fsrm a1
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: ceil_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX2-RV64-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX2-RV64-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    fsrmi a1, 3
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV64-NEXT:    fsrm a1
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: ceil_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX1-RV32-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX1-RV32-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    fsrmi a1, 3
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV32-NEXT:    fsrm a1
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: ceil_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX1-RV64-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX1-RV64-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    fsrmi a1, 3
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV64-NEXT:    fsrm a1
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: ceil_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v8, v9
+; ZVFHMIN-RV32-NEXT:    lui a1, 307200
+; ZVFHMIN-RV32-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v8, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a1, 3
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a1
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: ceil_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v8, v9
+; ZVFHMIN-RV64-NEXT:    lui a1, 307200
+; ZVFHMIN-RV64-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v8, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a1, 3
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a1
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -5796,107 +4216,56 @@ define void @floor_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: floor_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX2-RV32-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX2-RV32-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    fsrmi a1, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV32-NEXT:    fsrm a1
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: floor_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX2-RV64-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX2-RV64-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    fsrmi a1, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV64-NEXT:    fsrm a1
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: floor_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX1-RV32-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX1-RV32-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    fsrmi a1, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV32-NEXT:    fsrm a1
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: floor_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX1-RV64-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX1-RV64-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    fsrmi a1, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV64-NEXT:    fsrm a1
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: floor_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v8, v9
+; ZVFHMIN-RV32-NEXT:    lui a1, 307200
+; ZVFHMIN-RV32-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v8, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a1, 2
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a1
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: floor_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v8, v9
+; ZVFHMIN-RV64-NEXT:    lui a1, 307200
+; ZVFHMIN-RV64-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v8, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a1, 2
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a1
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -6034,107 +4403,56 @@ define void @round_v6f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: round_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX2-RV32-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX2-RV32-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX2-RV32-NEXT:    fsrmi a1, 4
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV32-NEXT:    fsrm a1
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: round_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX2-RV64-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX2-RV64-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX2-RV64-NEXT:    fsrmi a1, 4
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV64-NEXT:    fsrm a1
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: round_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX1-RV32-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX1-RV32-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX1-RV32-NEXT:    fsrmi a1, 4
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV32-NEXT:    fsrm a1
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: round_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfabs.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    lui a1, 307200
-; ZVFHMINLMULMAX1-RV64-NEXT:    fmv.w.x fa5, a1
-; ZVFHMINLMULMAX1-RV64-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMINLMULMAX1-RV64-NEXT:    fsrmi a1, 4
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV64-NEXT:    fsrm a1
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: round_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfabs.v v8, v9
+; ZVFHMIN-RV32-NEXT:    lui a1, 307200
+; ZVFHMIN-RV32-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-RV32-NEXT:    vmflt.vf v0, v8, fa5
+; ZVFHMIN-RV32-NEXT:    fsrmi a1, 4
+; ZVFHMIN-RV32-NEXT:    vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    fsrm a1
+; ZVFHMIN-RV32-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v9, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: round_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfabs.v v8, v9
+; ZVFHMIN-RV64-NEXT:    lui a1, 307200
+; ZVFHMIN-RV64-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-RV64-NEXT:    vmflt.vf v0, v8, fa5
+; ZVFHMIN-RV64-NEXT:    fsrmi a1, 4
+; ZVFHMIN-RV64-NEXT:    vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    fsrm a1
+; ZVFHMIN-RV64-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -6470,107 +4788,56 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vse16.v v10, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fmuladd_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v10, (a2)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmul.vv v8, v8, v11
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfadd.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fmuladd_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v10, (a2)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmul.vv v8, v8, v11
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfadd.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fmuladd_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v10, (a2)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmul.vv v8, v8, v11
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfadd.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fmuladd_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmul.vv v8, v8, v11
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfadd.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fmuladd_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    vle16.v v10, (a2)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmul.vv v8, v8, v11
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfadd.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fmuladd_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vle16.v v10, (a2)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmul.vv v8, v8, v11
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfadd.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = load <6 x half>, ptr %z
@@ -6681,107 +4948,56 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFH-NEXT:    vse16.v v10, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_fmuladd_v6f16:
-; ZVFHMINLMULMAX2-RV32:       # %bb.0:
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vle16.v v10, (a2)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfmul.vv v8, v8, v11
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfsub.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX2-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_fmuladd_v6f16:
-; ZVFHMINLMULMAX2-RV64:       # %bb.0:
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vle16.v v10, (a2)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfmul.vv v8, v8, v11
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfsub.vv v8, v8, v9
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX2-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX2-RV64-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_fmuladd_v6f16:
-; ZVFHMINLMULMAX1-RV32:       # %bb.0:
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vle16.v v10, (a2)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfmul.vv v8, v8, v11
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfsub.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; ZVFHMINLMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMINLMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV32-NEXT:    ret
-;
-; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_fmuladd_v6f16:
-; ZVFHMINLMULMAX1-RV64:       # %bb.0:
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfmul.vv v8, v8, v11
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfwcvt.f.f.v v9, v10
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfsub.vv v8, v8, v9
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vfncvt.f.f.w v9, v8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMINLMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; ZVFHMINLMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; ZVFHMINLMULMAX1-RV64-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fmsub_fmuladd_v6f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    vle16.v v10, (a2)
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfmul.vv v8, v8, v11
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfsub.vv v8, v8, v9
+; ZVFHMIN-RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT:    addi a1, a0, 8
+; ZVFHMIN-RV32-NEXT:    vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fmsub_fmuladd_v6f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vle16.v v10, (a2)
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfmul.vv v8, v8, v11
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfsub.vv v8, v8, v9
+; ZVFHMIN-RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT:    vse64.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT:    addi a0, a0, 8
+; ZVFHMIN-RV64-NEXT:    vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = load <6 x half>, ptr %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index 44b96d076df455..dbc65620b7f249 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -1,12 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV32,LMULMAX8RV32ZVFH
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV64,LMULMAX8RV64ZVFH
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV32,LMULMAX1RV32ZVFH
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV64,LMULMAX1RV64ZVFH
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV32,LMULMAX8RV32ZVFHMIN
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV64,LMULMAX8RV64ZVFHMIN
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV32,LMULMAX1RV32ZVFHMIN
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV64,LMULMAX1RV64ZVFHMIN
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64
 
 define void @fp2si_v2f32_v2i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: fp2si_v2f32_v2i32:
@@ -128,194 +124,194 @@ define <3 x i1> @fp2si_v3f32_v3i1(<3 x float> %x) {
 
 ; FIXME: This is expanded when they could be widened + promoted
 define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
-; LMULMAX8RV32-LABEL: fp2si_v3f32_v3i15:
-; LMULMAX8RV32:       # %bb.0:
-; LMULMAX8RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX8RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX8RV32-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX8RV32-NEXT:    vmv.x.s a1, v8
-; LMULMAX8RV32-NEXT:    slli a2, a1, 17
-; LMULMAX8RV32-NEXT:    srli a2, a2, 19
-; LMULMAX8RV32-NEXT:    sh a2, 4(a0)
-; LMULMAX8RV32-NEXT:    vmv.x.s a2, v9
-; LMULMAX8RV32-NEXT:    lui a3, 8
-; LMULMAX8RV32-NEXT:    addi a3, a3, -1
-; LMULMAX8RV32-NEXT:    and a2, a2, a3
-; LMULMAX8RV32-NEXT:    vslidedown.vi v8, v9, 1
-; LMULMAX8RV32-NEXT:    vmv.x.s a4, v8
-; LMULMAX8RV32-NEXT:    and a3, a4, a3
-; LMULMAX8RV32-NEXT:    slli a3, a3, 15
-; LMULMAX8RV32-NEXT:    slli a1, a1, 30
-; LMULMAX8RV32-NEXT:    or a1, a2, a1
-; LMULMAX8RV32-NEXT:    or a1, a1, a3
-; LMULMAX8RV32-NEXT:    sw a1, 0(a0)
-; LMULMAX8RV32-NEXT:    ret
+; ZVFH32-LABEL: fp2si_v3f32_v3i15:
+; ZVFH32:       # %bb.0:
+; ZVFH32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; ZVFH32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFH32-NEXT:    vmv.x.s a1, v8
+; ZVFH32-NEXT:    slli a2, a1, 17
+; ZVFH32-NEXT:    srli a2, a2, 19
+; ZVFH32-NEXT:    sh a2, 4(a0)
+; ZVFH32-NEXT:    vmv.x.s a2, v9
+; ZVFH32-NEXT:    lui a3, 8
+; ZVFH32-NEXT:    addi a3, a3, -1
+; ZVFH32-NEXT:    and a2, a2, a3
+; ZVFH32-NEXT:    vslidedown.vi v8, v9, 1
+; ZVFH32-NEXT:    vmv.x.s a4, v8
+; ZVFH32-NEXT:    and a3, a4, a3
+; ZVFH32-NEXT:    slli a3, a3, 15
+; ZVFH32-NEXT:    slli a1, a1, 30
+; ZVFH32-NEXT:    or a1, a2, a1
+; ZVFH32-NEXT:    or a1, a1, a3
+; ZVFH32-NEXT:    sw a1, 0(a0)
+; ZVFH32-NEXT:    ret
 ;
-; LMULMAX8RV64-LABEL: fp2si_v3f32_v3i15:
-; LMULMAX8RV64:       # %bb.0:
-; LMULMAX8RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX8RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX8RV64-NEXT:    vmv.x.s a1, v9
-; LMULMAX8RV64-NEXT:    lui a2, 8
-; LMULMAX8RV64-NEXT:    addiw a2, a2, -1
-; LMULMAX8RV64-NEXT:    and a1, a1, a2
-; LMULMAX8RV64-NEXT:    vslidedown.vi v8, v9, 1
-; LMULMAX8RV64-NEXT:    vmv.x.s a3, v8
-; LMULMAX8RV64-NEXT:    and a2, a3, a2
-; LMULMAX8RV64-NEXT:    slli a2, a2, 15
-; LMULMAX8RV64-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX8RV64-NEXT:    vmv.x.s a3, v8
-; LMULMAX8RV64-NEXT:    slli a3, a3, 30
-; LMULMAX8RV64-NEXT:    or a1, a1, a3
-; LMULMAX8RV64-NEXT:    or a1, a1, a2
-; LMULMAX8RV64-NEXT:    sw a1, 0(a0)
-; LMULMAX8RV64-NEXT:    slli a1, a1, 19
-; LMULMAX8RV64-NEXT:    srli a1, a1, 51
-; LMULMAX8RV64-NEXT:    sh a1, 4(a0)
-; LMULMAX8RV64-NEXT:    ret
+; ZVFH64-LABEL: fp2si_v3f32_v3i15:
+; ZVFH64:       # %bb.0:
+; ZVFH64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; ZVFH64-NEXT:    vmv.x.s a1, v9
+; ZVFH64-NEXT:    lui a2, 8
+; ZVFH64-NEXT:    addiw a2, a2, -1
+; ZVFH64-NEXT:    and a1, a1, a2
+; ZVFH64-NEXT:    vslidedown.vi v8, v9, 1
+; ZVFH64-NEXT:    vmv.x.s a3, v8
+; ZVFH64-NEXT:    and a2, a3, a2
+; ZVFH64-NEXT:    slli a2, a2, 15
+; ZVFH64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFH64-NEXT:    vmv.x.s a3, v8
+; ZVFH64-NEXT:    slli a3, a3, 30
+; ZVFH64-NEXT:    or a1, a1, a3
+; ZVFH64-NEXT:    or a1, a1, a2
+; ZVFH64-NEXT:    sw a1, 0(a0)
+; ZVFH64-NEXT:    slli a1, a1, 19
+; ZVFH64-NEXT:    srli a1, a1, 51
+; ZVFH64-NEXT:    sh a1, 4(a0)
+; ZVFH64-NEXT:    ret
 ;
-; LMULMAX1RV32-LABEL: fp2si_v3f32_v3i15:
-; LMULMAX1RV32:       # %bb.0:
-; LMULMAX1RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX1RV32-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1RV32-NEXT:    vmv.x.s a1, v8
-; LMULMAX1RV32-NEXT:    slli a2, a1, 17
-; LMULMAX1RV32-NEXT:    srli a2, a2, 19
-; LMULMAX1RV32-NEXT:    sh a2, 4(a0)
-; LMULMAX1RV32-NEXT:    vmv.x.s a2, v9
-; LMULMAX1RV32-NEXT:    lui a3, 8
-; LMULMAX1RV32-NEXT:    addi a3, a3, -1
-; LMULMAX1RV32-NEXT:    and a2, a2, a3
-; LMULMAX1RV32-NEXT:    vslidedown.vi v8, v9, 1
-; LMULMAX1RV32-NEXT:    vmv.x.s a4, v8
-; LMULMAX1RV32-NEXT:    and a3, a4, a3
-; LMULMAX1RV32-NEXT:    slli a3, a3, 15
-; LMULMAX1RV32-NEXT:    slli a1, a1, 30
-; LMULMAX1RV32-NEXT:    or a1, a2, a1
-; LMULMAX1RV32-NEXT:    or a1, a1, a3
-; LMULMAX1RV32-NEXT:    sw a1, 0(a0)
-; LMULMAX1RV32-NEXT:    ret
+; ZVFHMIN32-LABEL: fp2si_v3f32_v3i15:
+; ZVFHMIN32:       # %bb.0:
+; ZVFHMIN32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; ZVFHMIN32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN32-NEXT:    vmv.x.s a1, v8
+; ZVFHMIN32-NEXT:    slli a2, a1, 17
+; ZVFHMIN32-NEXT:    srli a2, a2, 19
+; ZVFHMIN32-NEXT:    sh a2, 4(a0)
+; ZVFHMIN32-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN32-NEXT:    lui a3, 8
+; ZVFHMIN32-NEXT:    addi a3, a3, -1
+; ZVFHMIN32-NEXT:    and a2, a2, a3
+; ZVFHMIN32-NEXT:    vslidedown.vi v8, v9, 1
+; ZVFHMIN32-NEXT:    vmv.x.s a4, v8
+; ZVFHMIN32-NEXT:    and a3, a4, a3
+; ZVFHMIN32-NEXT:    slli a3, a3, 15
+; ZVFHMIN32-NEXT:    slli a1, a1, 30
+; ZVFHMIN32-NEXT:    or a1, a2, a1
+; ZVFHMIN32-NEXT:    or a1, a1, a3
+; ZVFHMIN32-NEXT:    sw a1, 0(a0)
+; ZVFHMIN32-NEXT:    ret
 ;
-; LMULMAX1RV64-LABEL: fp2si_v3f32_v3i15:
-; LMULMAX1RV64:       # %bb.0:
-; LMULMAX1RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX1RV64-NEXT:    vmv.x.s a1, v9
-; LMULMAX1RV64-NEXT:    lui a2, 8
-; LMULMAX1RV64-NEXT:    addiw a2, a2, -1
-; LMULMAX1RV64-NEXT:    and a1, a1, a2
-; LMULMAX1RV64-NEXT:    vslidedown.vi v8, v9, 1
-; LMULMAX1RV64-NEXT:    vmv.x.s a3, v8
-; LMULMAX1RV64-NEXT:    and a2, a3, a2
-; LMULMAX1RV64-NEXT:    slli a2, a2, 15
-; LMULMAX1RV64-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1RV64-NEXT:    vmv.x.s a3, v8
-; LMULMAX1RV64-NEXT:    slli a3, a3, 30
-; LMULMAX1RV64-NEXT:    or a1, a1, a3
-; LMULMAX1RV64-NEXT:    or a1, a1, a2
-; LMULMAX1RV64-NEXT:    sw a1, 0(a0)
-; LMULMAX1RV64-NEXT:    slli a1, a1, 19
-; LMULMAX1RV64-NEXT:    srli a1, a1, 51
-; LMULMAX1RV64-NEXT:    sh a1, 4(a0)
-; LMULMAX1RV64-NEXT:    ret
+; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15:
+; ZVFHMIN64:       # %bb.0:
+; ZVFHMIN64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; ZVFHMIN64-NEXT:    vmv.x.s a1, v9
+; ZVFHMIN64-NEXT:    lui a2, 8
+; ZVFHMIN64-NEXT:    addiw a2, a2, -1
+; ZVFHMIN64-NEXT:    and a1, a1, a2
+; ZVFHMIN64-NEXT:    vslidedown.vi v8, v9, 1
+; ZVFHMIN64-NEXT:    vmv.x.s a3, v8
+; ZVFHMIN64-NEXT:    and a2, a3, a2
+; ZVFHMIN64-NEXT:    slli a2, a2, 15
+; ZVFHMIN64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN64-NEXT:    vmv.x.s a3, v8
+; ZVFHMIN64-NEXT:    slli a3, a3, 30
+; ZVFHMIN64-NEXT:    or a1, a1, a3
+; ZVFHMIN64-NEXT:    or a1, a1, a2
+; ZVFHMIN64-NEXT:    sw a1, 0(a0)
+; ZVFHMIN64-NEXT:    slli a1, a1, 19
+; ZVFHMIN64-NEXT:    srli a1, a1, 51
+; ZVFHMIN64-NEXT:    sh a1, 4(a0)
+; ZVFHMIN64-NEXT:    ret
   %z = fptosi <3 x float> %x to <3 x i15>
   ret <3 x i15> %z
 }
 
 ; FIXME: This is expanded when they could be widened + promoted
 define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
-; LMULMAX8RV32-LABEL: fp2ui_v3f32_v3i15:
-; LMULMAX8RV32:       # %bb.0:
-; LMULMAX8RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX8RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX8RV32-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX8RV32-NEXT:    vmv.x.s a1, v8
-; LMULMAX8RV32-NEXT:    slli a2, a1, 17
-; LMULMAX8RV32-NEXT:    srli a2, a2, 19
-; LMULMAX8RV32-NEXT:    sh a2, 4(a0)
-; LMULMAX8RV32-NEXT:    vmv.x.s a2, v9
-; LMULMAX8RV32-NEXT:    lui a3, 16
-; LMULMAX8RV32-NEXT:    addi a3, a3, -1
-; LMULMAX8RV32-NEXT:    and a2, a2, a3
-; LMULMAX8RV32-NEXT:    vslidedown.vi v8, v9, 1
-; LMULMAX8RV32-NEXT:    vmv.x.s a4, v8
-; LMULMAX8RV32-NEXT:    and a3, a4, a3
-; LMULMAX8RV32-NEXT:    slli a3, a3, 15
-; LMULMAX8RV32-NEXT:    slli a1, a1, 30
-; LMULMAX8RV32-NEXT:    or a1, a2, a1
-; LMULMAX8RV32-NEXT:    or a1, a1, a3
-; LMULMAX8RV32-NEXT:    sw a1, 0(a0)
-; LMULMAX8RV32-NEXT:    ret
+; ZVFH32-LABEL: fp2ui_v3f32_v3i15:
+; ZVFH32:       # %bb.0:
+; ZVFH32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; ZVFH32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFH32-NEXT:    vmv.x.s a1, v8
+; ZVFH32-NEXT:    slli a2, a1, 17
+; ZVFH32-NEXT:    srli a2, a2, 19
+; ZVFH32-NEXT:    sh a2, 4(a0)
+; ZVFH32-NEXT:    vmv.x.s a2, v9
+; ZVFH32-NEXT:    lui a3, 16
+; ZVFH32-NEXT:    addi a3, a3, -1
+; ZVFH32-NEXT:    and a2, a2, a3
+; ZVFH32-NEXT:    vslidedown.vi v8, v9, 1
+; ZVFH32-NEXT:    vmv.x.s a4, v8
+; ZVFH32-NEXT:    and a3, a4, a3
+; ZVFH32-NEXT:    slli a3, a3, 15
+; ZVFH32-NEXT:    slli a1, a1, 30
+; ZVFH32-NEXT:    or a1, a2, a1
+; ZVFH32-NEXT:    or a1, a1, a3
+; ZVFH32-NEXT:    sw a1, 0(a0)
+; ZVFH32-NEXT:    ret
 ;
-; LMULMAX8RV64-LABEL: fp2ui_v3f32_v3i15:
-; LMULMAX8RV64:       # %bb.0:
-; LMULMAX8RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX8RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX8RV64-NEXT:    vmv.x.s a1, v9
-; LMULMAX8RV64-NEXT:    lui a2, 16
-; LMULMAX8RV64-NEXT:    addiw a2, a2, -1
-; LMULMAX8RV64-NEXT:    and a1, a1, a2
-; LMULMAX8RV64-NEXT:    vslidedown.vi v8, v9, 1
-; LMULMAX8RV64-NEXT:    vmv.x.s a3, v8
-; LMULMAX8RV64-NEXT:    and a2, a3, a2
-; LMULMAX8RV64-NEXT:    slli a2, a2, 15
-; LMULMAX8RV64-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX8RV64-NEXT:    vmv.x.s a3, v8
-; LMULMAX8RV64-NEXT:    slli a3, a3, 30
-; LMULMAX8RV64-NEXT:    or a1, a1, a3
-; LMULMAX8RV64-NEXT:    or a1, a1, a2
-; LMULMAX8RV64-NEXT:    sw a1, 0(a0)
-; LMULMAX8RV64-NEXT:    slli a1, a1, 19
-; LMULMAX8RV64-NEXT:    srli a1, a1, 51
-; LMULMAX8RV64-NEXT:    sh a1, 4(a0)
-; LMULMAX8RV64-NEXT:    ret
+; ZVFH64-LABEL: fp2ui_v3f32_v3i15:
+; ZVFH64:       # %bb.0:
+; ZVFH64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; ZVFH64-NEXT:    vmv.x.s a1, v9
+; ZVFH64-NEXT:    lui a2, 16
+; ZVFH64-NEXT:    addiw a2, a2, -1
+; ZVFH64-NEXT:    and a1, a1, a2
+; ZVFH64-NEXT:    vslidedown.vi v8, v9, 1
+; ZVFH64-NEXT:    vmv.x.s a3, v8
+; ZVFH64-NEXT:    and a2, a3, a2
+; ZVFH64-NEXT:    slli a2, a2, 15
+; ZVFH64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFH64-NEXT:    vmv.x.s a3, v8
+; ZVFH64-NEXT:    slli a3, a3, 30
+; ZVFH64-NEXT:    or a1, a1, a3
+; ZVFH64-NEXT:    or a1, a1, a2
+; ZVFH64-NEXT:    sw a1, 0(a0)
+; ZVFH64-NEXT:    slli a1, a1, 19
+; ZVFH64-NEXT:    srli a1, a1, 51
+; ZVFH64-NEXT:    sh a1, 4(a0)
+; ZVFH64-NEXT:    ret
 ;
-; LMULMAX1RV32-LABEL: fp2ui_v3f32_v3i15:
-; LMULMAX1RV32:       # %bb.0:
-; LMULMAX1RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1RV32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX1RV32-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1RV32-NEXT:    vmv.x.s a1, v8
-; LMULMAX1RV32-NEXT:    slli a2, a1, 17
-; LMULMAX1RV32-NEXT:    srli a2, a2, 19
-; LMULMAX1RV32-NEXT:    sh a2, 4(a0)
-; LMULMAX1RV32-NEXT:    vmv.x.s a2, v9
-; LMULMAX1RV32-NEXT:    lui a3, 16
-; LMULMAX1RV32-NEXT:    addi a3, a3, -1
-; LMULMAX1RV32-NEXT:    and a2, a2, a3
-; LMULMAX1RV32-NEXT:    vslidedown.vi v8, v9, 1
-; LMULMAX1RV32-NEXT:    vmv.x.s a4, v8
-; LMULMAX1RV32-NEXT:    and a3, a4, a3
-; LMULMAX1RV32-NEXT:    slli a3, a3, 15
-; LMULMAX1RV32-NEXT:    slli a1, a1, 30
-; LMULMAX1RV32-NEXT:    or a1, a2, a1
-; LMULMAX1RV32-NEXT:    or a1, a1, a3
-; LMULMAX1RV32-NEXT:    sw a1, 0(a0)
-; LMULMAX1RV32-NEXT:    ret
+; ZVFHMIN32-LABEL: fp2ui_v3f32_v3i15:
+; ZVFHMIN32:       # %bb.0:
+; ZVFHMIN32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN32-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; ZVFHMIN32-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN32-NEXT:    vmv.x.s a1, v8
+; ZVFHMIN32-NEXT:    slli a2, a1, 17
+; ZVFHMIN32-NEXT:    srli a2, a2, 19
+; ZVFHMIN32-NEXT:    sh a2, 4(a0)
+; ZVFHMIN32-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN32-NEXT:    lui a3, 16
+; ZVFHMIN32-NEXT:    addi a3, a3, -1
+; ZVFHMIN32-NEXT:    and a2, a2, a3
+; ZVFHMIN32-NEXT:    vslidedown.vi v8, v9, 1
+; ZVFHMIN32-NEXT:    vmv.x.s a4, v8
+; ZVFHMIN32-NEXT:    and a3, a4, a3
+; ZVFHMIN32-NEXT:    slli a3, a3, 15
+; ZVFHMIN32-NEXT:    slli a1, a1, 30
+; ZVFHMIN32-NEXT:    or a1, a2, a1
+; ZVFHMIN32-NEXT:    or a1, a1, a3
+; ZVFHMIN32-NEXT:    sw a1, 0(a0)
+; ZVFHMIN32-NEXT:    ret
 ;
-; LMULMAX1RV64-LABEL: fp2ui_v3f32_v3i15:
-; LMULMAX1RV64:       # %bb.0:
-; LMULMAX1RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1RV64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX1RV64-NEXT:    vmv.x.s a1, v9
-; LMULMAX1RV64-NEXT:    lui a2, 16
-; LMULMAX1RV64-NEXT:    addiw a2, a2, -1
-; LMULMAX1RV64-NEXT:    and a1, a1, a2
-; LMULMAX1RV64-NEXT:    vslidedown.vi v8, v9, 1
-; LMULMAX1RV64-NEXT:    vmv.x.s a3, v8
-; LMULMAX1RV64-NEXT:    and a2, a3, a2
-; LMULMAX1RV64-NEXT:    slli a2, a2, 15
-; LMULMAX1RV64-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1RV64-NEXT:    vmv.x.s a3, v8
-; LMULMAX1RV64-NEXT:    slli a3, a3, 30
-; LMULMAX1RV64-NEXT:    or a1, a1, a3
-; LMULMAX1RV64-NEXT:    or a1, a1, a2
-; LMULMAX1RV64-NEXT:    sw a1, 0(a0)
-; LMULMAX1RV64-NEXT:    slli a1, a1, 19
-; LMULMAX1RV64-NEXT:    srli a1, a1, 51
-; LMULMAX1RV64-NEXT:    sh a1, 4(a0)
-; LMULMAX1RV64-NEXT:    ret
+; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15:
+; ZVFHMIN64:       # %bb.0:
+; ZVFHMIN64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN64-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; ZVFHMIN64-NEXT:    vmv.x.s a1, v9
+; ZVFHMIN64-NEXT:    lui a2, 16
+; ZVFHMIN64-NEXT:    addiw a2, a2, -1
+; ZVFHMIN64-NEXT:    and a1, a1, a2
+; ZVFHMIN64-NEXT:    vslidedown.vi v8, v9, 1
+; ZVFHMIN64-NEXT:    vmv.x.s a3, v8
+; ZVFHMIN64-NEXT:    and a2, a3, a2
+; ZVFHMIN64-NEXT:    slli a2, a2, 15
+; ZVFHMIN64-NEXT:    vslidedown.vi v8, v9, 2
+; ZVFHMIN64-NEXT:    vmv.x.s a3, v8
+; ZVFHMIN64-NEXT:    slli a3, a3, 30
+; ZVFHMIN64-NEXT:    or a1, a1, a3
+; ZVFHMIN64-NEXT:    or a1, a1, a2
+; ZVFHMIN64-NEXT:    sw a1, 0(a0)
+; ZVFHMIN64-NEXT:    slli a1, a1, 19
+; ZVFHMIN64-NEXT:    srli a1, a1, 51
+; ZVFHMIN64-NEXT:    sh a1, 4(a0)
+; ZVFHMIN64-NEXT:    ret
   %z = fptoui <3 x float> %x to <3 x i15>
   ret <3 x i15> %z
 }
@@ -333,26 +329,13 @@ define <3 x i1> @fp2ui_v3f32_v3i1(<3 x float> %x) {
 }
 
 define void @fp2si_v8f32_v8i32(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: fp2si_v8f32_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; LMULMAX8-NEXT:    vse32.v v8, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fp2si_v8f32_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    vle32.v v8, (a2)
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; LMULMAX1-NEXT:    vfcvt.rtz.x.f.v v9, v9
-; LMULMAX1-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fp2si_v8f32_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
+; CHECK-NEXT:    vse32.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %d = fptosi <8 x float> %a to <8 x i32>
   store <8 x i32> %d, ptr %y
@@ -360,26 +343,13 @@ define void @fp2si_v8f32_v8i32(ptr %x, ptr %y) {
 }
 
 define void @fp2ui_v8f32_v8i32(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: fp2ui_v8f32_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX8-NEXT:    vse32.v v8, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fp2ui_v8f32_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    vle32.v v8, (a2)
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX1-NEXT:    vfcvt.rtz.xu.f.v v9, v9
-; LMULMAX1-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fp2ui_v8f32_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK-NEXT:    vse32.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %d = fptoui <8 x float> %a to <8 x i32>
   store <8 x i32> %d, ptr %y
@@ -387,67 +357,25 @@ define void @fp2ui_v8f32_v8i32(ptr %x, ptr %y) {
 }
 
 define <8 x i1> @fp2si_v8f32_v8i1(<8 x float> %x) {
-; LMULMAX8-LABEL: fp2si_v8f32_v8i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; LMULMAX8-NEXT:    vand.vi v8, v10, 1
-; LMULMAX8-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fp2si_v8f32_v8i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; LMULMAX1-NEXT:    vand.vi v8, v10, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v10, v9
-; LMULMAX1-NEXT:    vand.vi v9, v10, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fp2si_v8f32_v8i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
+; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
   %z = fptosi <8 x float> %x to <8 x i1>
   ret <8 x i1> %z
 }
 
 define <8 x i1> @fp2ui_v8f32_v8i1(<8 x float> %x) {
-; LMULMAX8-LABEL: fp2ui_v8f32_v8i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; LMULMAX8-NEXT:    vand.vi v8, v10, 1
-; LMULMAX8-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fp2ui_v8f32_v8i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; LMULMAX1-NEXT:    vand.vi v8, v10, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v10, v9
-; LMULMAX1-NEXT:    vand.vi v9, v10, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fp2ui_v8f32_v8i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
+; CHECK-NEXT:    vand.vi v8, v10, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
   %z = fptoui <8 x float> %x to <8 x i1>
   ret <8 x i1> %z
 }
@@ -481,39 +409,13 @@ define void @fp2ui_v2f32_v2i64(ptr %x, ptr %y) {
 }
 
 define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: fp2si_v8f32_v8i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.rtz.x.f.v v12, v8
-; LMULMAX8-NEXT:    vse64.v v12, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fp2si_v8f32_v8i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    vle32.v v8, (a2)
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.rtz.x.f.v v10, v8
-; LMULMAX1-NEXT:    vfwcvt.rtz.x.f.v v11, v9
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.rtz.x.f.v v12, v8
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.rtz.x.f.v v9, v8
-; LMULMAX1-NEXT:    addi a0, a1, 16
-; LMULMAX1-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 48
-; LMULMAX1-NEXT:    vse64.v v12, (a0)
-; LMULMAX1-NEXT:    vse64.v v11, (a1)
-; LMULMAX1-NEXT:    addi a0, a1, 32
-; LMULMAX1-NEXT:    vse64.v v10, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fp2si_v8f32_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v12, v8
+; CHECK-NEXT:    vse64.v v12, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %d = fptosi <8 x float> %a to <8 x i64>
   store <8 x i64> %d, ptr %y
@@ -521,39 +423,13 @@ define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) {
 }
 
 define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: fp2ui_v8f32_v8i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfwcvt.rtz.xu.f.v v12, v8
-; LMULMAX8-NEXT:    vse64.v v12, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fp2ui_v8f32_v8i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    vle32.v v8, (a2)
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.rtz.xu.f.v v10, v8
-; LMULMAX1-NEXT:    vfwcvt.rtz.xu.f.v v11, v9
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.rtz.xu.f.v v12, v8
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfwcvt.rtz.xu.f.v v9, v8
-; LMULMAX1-NEXT:    addi a0, a1, 16
-; LMULMAX1-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 48
-; LMULMAX1-NEXT:    vse64.v v12, (a0)
-; LMULMAX1-NEXT:    vse64.v v11, (a1)
-; LMULMAX1-NEXT:    addi a0, a1, 32
-; LMULMAX1-NEXT:    vse64.v v10, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fp2ui_v8f32_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v12, v8
+; CHECK-NEXT:    vse64.v v12, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x float>, ptr %x
   %d = fptoui <8 x float> %a to <8 x i64>
   store <8 x i64> %d, ptr %y
@@ -593,145 +469,43 @@ define void @fp2ui_v2f16_v2i64(ptr %x, ptr %y) {
 }
 
 define <2 x i1> @fp2si_v2f16_v2i1(<2 x half> %x) {
-; LMULMAX8RV32ZVFH-LABEL: fp2si_v2f16_v2i1:
-; LMULMAX8RV32ZVFH:       # %bb.0:
-; LMULMAX8RV32ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX8RV32ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX8RV32ZVFH-NEXT:    vand.vi v8, v9, 1
-; LMULMAX8RV32ZVFH-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8RV32ZVFH-NEXT:    ret
-;
-; LMULMAX8RV64ZVFH-LABEL: fp2si_v2f16_v2i1:
-; LMULMAX8RV64ZVFH:       # %bb.0:
-; LMULMAX8RV64ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX8RV64ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX8RV64ZVFH-NEXT:    vand.vi v8, v9, 1
-; LMULMAX8RV64ZVFH-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8RV64ZVFH-NEXT:    ret
-;
-; LMULMAX1RV32ZVFH-LABEL: fp2si_v2f16_v2i1:
-; LMULMAX1RV32ZVFH:       # %bb.0:
-; LMULMAX1RV32ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX1RV32ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX1RV32ZVFH-NEXT:    vand.vi v8, v9, 1
-; LMULMAX1RV32ZVFH-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1RV32ZVFH-NEXT:    ret
+; ZVFH-LABEL: fp2si_v2f16_v2i1:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
+; ZVFH-NEXT:    vand.vi v8, v9, 1
+; ZVFH-NEXT:    vmsne.vi v0, v8, 0
+; ZVFH-NEXT:    ret
 ;
-; LMULMAX1RV64ZVFH-LABEL: fp2si_v2f16_v2i1:
-; LMULMAX1RV64ZVFH:       # %bb.0:
-; LMULMAX1RV64ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX1RV64ZVFH-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; LMULMAX1RV64ZVFH-NEXT:    vand.vi v8, v9, 1
-; LMULMAX1RV64ZVFH-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1RV64ZVFH-NEXT:    ret
-;
-; LMULMAX8RV32ZVFHMIN-LABEL: fp2si_v2f16_v2i1:
-; LMULMAX8RV32ZVFHMIN:       # %bb.0:
-; LMULMAX8RV32ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX8RV32ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; LMULMAX8RV32ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v8, v9
-; LMULMAX8RV32ZVFHMIN-NEXT:    vand.vi v8, v8, 1
-; LMULMAX8RV32ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8RV32ZVFHMIN-NEXT:    ret
-;
-; LMULMAX8RV64ZVFHMIN-LABEL: fp2si_v2f16_v2i1:
-; LMULMAX8RV64ZVFHMIN:       # %bb.0:
-; LMULMAX8RV64ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX8RV64ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; LMULMAX8RV64ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v8, v9
-; LMULMAX8RV64ZVFHMIN-NEXT:    vand.vi v8, v8, 1
-; LMULMAX8RV64ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8RV64ZVFHMIN-NEXT:    ret
-;
-; LMULMAX1RV32ZVFHMIN-LABEL: fp2si_v2f16_v2i1:
-; LMULMAX1RV32ZVFHMIN:       # %bb.0:
-; LMULMAX1RV32ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX1RV32ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; LMULMAX1RV32ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v8, v9
-; LMULMAX1RV32ZVFHMIN-NEXT:    vand.vi v8, v8, 1
-; LMULMAX1RV32ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1RV32ZVFHMIN-NEXT:    ret
-;
-; LMULMAX1RV64ZVFHMIN-LABEL: fp2si_v2f16_v2i1:
-; LMULMAX1RV64ZVFHMIN:       # %bb.0:
-; LMULMAX1RV64ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX1RV64ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; LMULMAX1RV64ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v8, v9
-; LMULMAX1RV64ZVFHMIN-NEXT:    vand.vi v8, v8, 1
-; LMULMAX1RV64ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1RV64ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: fp2si_v2f16_v2i1:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vfncvt.rtz.x.f.w v8, v9
+; ZVFHMIN-NEXT:    vand.vi v8, v8, 1
+; ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
+; ZVFHMIN-NEXT:    ret
   %z = fptosi <2 x half> %x to <2 x i1>
   ret <2 x i1> %z
 }
 
 define <2 x i1> @fp2ui_v2f16_v2i1(<2 x half> %x) {
-; LMULMAX8RV32ZVFH-LABEL: fp2ui_v2f16_v2i1:
-; LMULMAX8RV32ZVFH:       # %bb.0:
-; LMULMAX8RV32ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX8RV32ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; LMULMAX8RV32ZVFH-NEXT:    vand.vi v8, v9, 1
-; LMULMAX8RV32ZVFH-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8RV32ZVFH-NEXT:    ret
-;
-; LMULMAX8RV64ZVFH-LABEL: fp2ui_v2f16_v2i1:
-; LMULMAX8RV64ZVFH:       # %bb.0:
-; LMULMAX8RV64ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX8RV64ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; LMULMAX8RV64ZVFH-NEXT:    vand.vi v8, v9, 1
-; LMULMAX8RV64ZVFH-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8RV64ZVFH-NEXT:    ret
-;
-; LMULMAX1RV32ZVFH-LABEL: fp2ui_v2f16_v2i1:
-; LMULMAX1RV32ZVFH:       # %bb.0:
-; LMULMAX1RV32ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX1RV32ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; LMULMAX1RV32ZVFH-NEXT:    vand.vi v8, v9, 1
-; LMULMAX1RV32ZVFH-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1RV32ZVFH-NEXT:    ret
-;
-; LMULMAX1RV64ZVFH-LABEL: fp2ui_v2f16_v2i1:
-; LMULMAX1RV64ZVFH:       # %bb.0:
-; LMULMAX1RV64ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX1RV64ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; LMULMAX1RV64ZVFH-NEXT:    vand.vi v8, v9, 1
-; LMULMAX1RV64ZVFH-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1RV64ZVFH-NEXT:    ret
-;
-; LMULMAX8RV32ZVFHMIN-LABEL: fp2ui_v2f16_v2i1:
-; LMULMAX8RV32ZVFHMIN:       # %bb.0:
-; LMULMAX8RV32ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX8RV32ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; LMULMAX8RV32ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v8, v9
-; LMULMAX8RV32ZVFHMIN-NEXT:    vand.vi v8, v8, 1
-; LMULMAX8RV32ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8RV32ZVFHMIN-NEXT:    ret
+; ZVFH-LABEL: fp2ui_v2f16_v2i1:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; ZVFH-NEXT:    vfncvt.rtz.xu.f.w v9, v8
+; ZVFH-NEXT:    vand.vi v8, v9, 1
+; ZVFH-NEXT:    vmsne.vi v0, v8, 0
+; ZVFH-NEXT:    ret
 ;
-; LMULMAX8RV64ZVFHMIN-LABEL: fp2ui_v2f16_v2i1:
-; LMULMAX8RV64ZVFHMIN:       # %bb.0:
-; LMULMAX8RV64ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX8RV64ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; LMULMAX8RV64ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v8, v9
-; LMULMAX8RV64ZVFHMIN-NEXT:    vand.vi v8, v8, 1
-; LMULMAX8RV64ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8RV64ZVFHMIN-NEXT:    ret
-;
-; LMULMAX1RV32ZVFHMIN-LABEL: fp2ui_v2f16_v2i1:
-; LMULMAX1RV32ZVFHMIN:       # %bb.0:
-; LMULMAX1RV32ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX1RV32ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; LMULMAX1RV32ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v8, v9
-; LMULMAX1RV32ZVFHMIN-NEXT:    vand.vi v8, v8, 1
-; LMULMAX1RV32ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1RV32ZVFHMIN-NEXT:    ret
-;
-; LMULMAX1RV64ZVFHMIN-LABEL: fp2ui_v2f16_v2i1:
-; LMULMAX1RV64ZVFHMIN:       # %bb.0:
-; LMULMAX1RV64ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX1RV64ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; LMULMAX1RV64ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v8, v9
-; LMULMAX1RV64ZVFHMIN-NEXT:    vand.vi v8, v8, 1
-; LMULMAX1RV64ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1RV64ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: fp2ui_v2f16_v2i1:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vfncvt.rtz.xu.f.w v8, v9
+; ZVFHMIN-NEXT:    vand.vi v8, v8, 1
+; ZVFHMIN-NEXT:    vmsne.vi v0, v8, 0
+; ZVFHMIN-NEXT:    ret
   %z = fptoui <2 x half> %x to <2 x i1>
   ret <2 x i1> %z
 }
@@ -797,59 +571,17 @@ define <2 x i1> @fp2ui_v2f64_v2i1(<2 x double> %x) {
 }
 
 define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: fp2si_v8f64_v8i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v8, v12, 0
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX8-NEXT:    vse8.v v8, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fp2si_v8f64_v8i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a2, a0, 48
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vle64.v v8, (a2)
-; LMULMAX1-NEXT:    addi a2, a0, 32
-; LMULMAX1-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle64.v v11, (a0)
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v12, v9
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v12, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v12, v11
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v11, v12, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v11, v11, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v11, v10
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v10, v11, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v8, 6
-; LMULMAX1-NEXT:    vse8.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fp2si_v8f64_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %d = fptosi <8 x double> %a to <8 x i8>
   store <8 x i8> %d, ptr %y
@@ -857,59 +589,17 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
 }
 
 define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: fp2ui_v8f64_v8i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v8, v12, 0
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX8-NEXT:    vse8.v v8, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fp2ui_v8f64_v8i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a2, a0, 48
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vle64.v v8, (a2)
-; LMULMAX1-NEXT:    addi a2, a0, 32
-; LMULMAX1-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle64.v v11, (a0)
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v12, v9
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v12, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v12, v11
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v11, v12, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v11, v11, 0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v11, v10
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v10, v11, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v8, 6
-; LMULMAX1-NEXT:    vse8.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fp2ui_v8f64_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x double>, ptr %x
   %d = fptoui <8 x double> %a to <8 x i8>
   store <8 x i8> %d, ptr %y
@@ -917,111 +607,25 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
 }
 
 define <8 x i1> @fp2si_v8f64_v8i1(<8 x double> %x) {
-; LMULMAX8-LABEL: fp2si_v8f64_v8i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; LMULMAX8-NEXT:    vand.vi v8, v12, 1
-; LMULMAX8-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fp2si_v8f64_v8i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; LMULMAX1-NEXT:    vand.vi v8, v12, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v12, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v13, v9
-; LMULMAX1-NEXT:    vand.vi v9, v13, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v13, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v12, v13, 2
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v12, 0
-; LMULMAX1-NEXT:    vmerge.vim v12, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v13, v10
-; LMULMAX1-NEXT:    vand.vi v10, v13, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v10, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v12, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v12, 0
-; LMULMAX1-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v10, v11
-; LMULMAX1-NEXT:    vand.vi v10, v10, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 6
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fp2si_v8f64_v8i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
+; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
   %z = fptosi <8 x double> %x to <8 x i1>
   ret <8 x i1> %z
 }
 
 define <8 x i1> @fp2ui_v8f64_v8i1(<8 x double> %x) {
-; LMULMAX8-LABEL: fp2ui_v8f64_v8i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; LMULMAX8-NEXT:    vand.vi v8, v12, 1
-; LMULMAX8-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: fp2ui_v8f64_v8i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; LMULMAX1-NEXT:    vand.vi v8, v12, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v12, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v13, v9
-; LMULMAX1-NEXT:    vand.vi v9, v13, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v13, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v12, v13, 2
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v12, 0
-; LMULMAX1-NEXT:    vmerge.vim v12, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v13, v10
-; LMULMAX1-NEXT:    vand.vi v10, v13, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v10, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v12, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v12, 0
-; LMULMAX1-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v10, v11
-; LMULMAX1-NEXT:    vand.vi v10, v10, 1
-; LMULMAX1-NEXT:    vmsne.vi v0, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 6
-; LMULMAX1-NEXT:    vmsne.vi v0, v8, 0
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: fp2ui_v8f64_v8i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
+; CHECK-NEXT:    vand.vi v8, v12, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
   %z = fptoui <8 x double> %x to <8 x i1>
   ret <8 x i1> %z
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index ec11ada12eaa76..6ffa6ac250ed7f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -1,10 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8ZVFH,LMULMAX8RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8ZVFH,LMULMAX8RV64
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV64
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8ZVFHMIN,LMULMAX8RV32ZVFHMIN
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8ZVFHMIN,LMULMAX8RV64ZVFHMIN
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64
 
 define void @si2fp_v2i32_v2f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: si2fp_v2i32_v2f32:
@@ -132,214 +130,146 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) {
 
 ; FIXME: This gets expanded instead of widened + promoted
 define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
-; LMULMAX8RV32-LABEL: si2fp_v3i7_v3f32:
-; LMULMAX8RV32:       # %bb.0:
-; LMULMAX8RV32-NEXT:    lw a1, 4(a0)
-; LMULMAX8RV32-NEXT:    lw a2, 0(a0)
-; LMULMAX8RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32-NEXT:    lw a0, 8(a0)
-; LMULMAX8RV32-NEXT:    vmv.v.x v8, a2
-; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX8RV32-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX8RV32-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX8RV32-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX8RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX8RV32-NEXT:    vsext.vf2 v9, v8
-; LMULMAX8RV32-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX8RV32-NEXT:    ret
+; ZVFH32-LABEL: si2fp_v3i7_v3f32:
+; ZVFH32:       # %bb.0:
+; ZVFH32-NEXT:    lw a1, 4(a0)
+; ZVFH32-NEXT:    lw a2, 0(a0)
+; ZVFH32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH32-NEXT:    lw a0, 8(a0)
+; ZVFH32-NEXT:    vmv.v.x v8, a2
+; ZVFH32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFH32-NEXT:    vslide1down.vx v8, v8, a0
+; ZVFH32-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFH32-NEXT:    vadd.vv v8, v8, v8
+; ZVFH32-NEXT:    vsra.vi v8, v8, 1
+; ZVFH32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH32-NEXT:    vsext.vf2 v9, v8
+; ZVFH32-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFH32-NEXT:    ret
 ;
-; LMULMAX8RV64-LABEL: si2fp_v3i7_v3f32:
-; LMULMAX8RV64:       # %bb.0:
-; LMULMAX8RV64-NEXT:    ld a1, 8(a0)
-; LMULMAX8RV64-NEXT:    ld a2, 0(a0)
-; LMULMAX8RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64-NEXT:    ld a0, 16(a0)
-; LMULMAX8RV64-NEXT:    vmv.v.x v8, a2
-; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX8RV64-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX8RV64-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX8RV64-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX8RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX8RV64-NEXT:    vsext.vf2 v9, v8
-; LMULMAX8RV64-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX8RV64-NEXT:    ret
+; ZVFH64-LABEL: si2fp_v3i7_v3f32:
+; ZVFH64:       # %bb.0:
+; ZVFH64-NEXT:    ld a1, 8(a0)
+; ZVFH64-NEXT:    ld a2, 0(a0)
+; ZVFH64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH64-NEXT:    ld a0, 16(a0)
+; ZVFH64-NEXT:    vmv.v.x v8, a2
+; ZVFH64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFH64-NEXT:    vslide1down.vx v8, v8, a0
+; ZVFH64-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFH64-NEXT:    vadd.vv v8, v8, v8
+; ZVFH64-NEXT:    vsra.vi v8, v8, 1
+; ZVFH64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH64-NEXT:    vsext.vf2 v9, v8
+; ZVFH64-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFH64-NEXT:    ret
 ;
-; LMULMAX1RV32-LABEL: si2fp_v3i7_v3f32:
-; LMULMAX1RV32:       # %bb.0:
-; LMULMAX1RV32-NEXT:    lw a1, 4(a0)
-; LMULMAX1RV32-NEXT:    lw a2, 0(a0)
-; LMULMAX1RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV32-NEXT:    lw a0, 8(a0)
-; LMULMAX1RV32-NEXT:    vmv.v.x v8, a2
-; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX1RV32-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX1RV32-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX1RV32-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX1RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX1RV32-NEXT:    vsext.vf2 v9, v8
-; LMULMAX1RV32-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX1RV32-NEXT:    ret
+; ZVFHMIN32-LABEL: si2fp_v3i7_v3f32:
+; ZVFHMIN32:       # %bb.0:
+; ZVFHMIN32-NEXT:    lw a1, 4(a0)
+; ZVFHMIN32-NEXT:    lw a2, 0(a0)
+; ZVFHMIN32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN32-NEXT:    lw a0, 8(a0)
+; ZVFHMIN32-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN32-NEXT:    vslide1down.vx v8, v8, a0
+; ZVFHMIN32-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN32-NEXT:    vadd.vv v8, v8, v8
+; ZVFHMIN32-NEXT:    vsra.vi v8, v8, 1
+; ZVFHMIN32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN32-NEXT:    vsext.vf2 v9, v8
+; ZVFHMIN32-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFHMIN32-NEXT:    ret
 ;
-; LMULMAX1RV64-LABEL: si2fp_v3i7_v3f32:
-; LMULMAX1RV64:       # %bb.0:
-; LMULMAX1RV64-NEXT:    ld a1, 8(a0)
-; LMULMAX1RV64-NEXT:    ld a2, 0(a0)
-; LMULMAX1RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV64-NEXT:    ld a0, 16(a0)
-; LMULMAX1RV64-NEXT:    vmv.v.x v8, a2
-; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX1RV64-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX1RV64-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX1RV64-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX1RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX1RV64-NEXT:    vsext.vf2 v9, v8
-; LMULMAX1RV64-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX1RV64-NEXT:    ret
-;
-; LMULMAX8RV32ZVFHMIN-LABEL: si2fp_v3i7_v3f32:
-; LMULMAX8RV32ZVFHMIN:       # %bb.0:
-; LMULMAX8RV32ZVFHMIN-NEXT:    lw a1, 4(a0)
-; LMULMAX8RV32ZVFHMIN-NEXT:    lw a2, 0(a0)
-; LMULMAX8RV32ZVFHMIN-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32ZVFHMIN-NEXT:    lw a0, 8(a0)
-; LMULMAX8RV32ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; LMULMAX8RV32ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX8RV32ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX8RV32ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX8RV32ZVFHMIN-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX8RV32ZVFHMIN-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX8RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX8RV32ZVFHMIN-NEXT:    vsext.vf2 v9, v8
-; LMULMAX8RV32ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX8RV32ZVFHMIN-NEXT:    ret
-;
-; LMULMAX8RV64ZVFHMIN-LABEL: si2fp_v3i7_v3f32:
-; LMULMAX8RV64ZVFHMIN:       # %bb.0:
-; LMULMAX8RV64ZVFHMIN-NEXT:    ld a1, 8(a0)
-; LMULMAX8RV64ZVFHMIN-NEXT:    ld a2, 0(a0)
-; LMULMAX8RV64ZVFHMIN-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64ZVFHMIN-NEXT:    ld a0, 16(a0)
-; LMULMAX8RV64ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; LMULMAX8RV64ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX8RV64ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX8RV64ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX8RV64ZVFHMIN-NEXT:    vadd.vv v8, v8, v8
-; LMULMAX8RV64ZVFHMIN-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX8RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX8RV64ZVFHMIN-NEXT:    vsext.vf2 v9, v8
-; LMULMAX8RV64ZVFHMIN-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX8RV64ZVFHMIN-NEXT:    ret
+; ZVFHMIN64-LABEL: si2fp_v3i7_v3f32:
+; ZVFHMIN64:       # %bb.0:
+; ZVFHMIN64-NEXT:    ld a1, 8(a0)
+; ZVFHMIN64-NEXT:    ld a2, 0(a0)
+; ZVFHMIN64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN64-NEXT:    ld a0, 16(a0)
+; ZVFHMIN64-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN64-NEXT:    vslide1down.vx v8, v8, a0
+; ZVFHMIN64-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN64-NEXT:    vadd.vv v8, v8, v8
+; ZVFHMIN64-NEXT:    vsra.vi v8, v8, 1
+; ZVFHMIN64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN64-NEXT:    vsext.vf2 v9, v8
+; ZVFHMIN64-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFHMIN64-NEXT:    ret
   %z = sitofp <3 x i7> %x to <3 x float>
   ret <3 x float> %z
 }
 
 ; FIXME: This gets expanded instead of widened + promoted
 define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
-; LMULMAX8RV32-LABEL: ui2fp_v3i7_v3f32:
-; LMULMAX8RV32:       # %bb.0:
-; LMULMAX8RV32-NEXT:    lw a1, 4(a0)
-; LMULMAX8RV32-NEXT:    lw a2, 0(a0)
-; LMULMAX8RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32-NEXT:    lw a0, 8(a0)
-; LMULMAX8RV32-NEXT:    vmv.v.x v8, a2
-; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX8RV32-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX8RV32-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX8RV32-NEXT:    li a0, 127
-; LMULMAX8RV32-NEXT:    vand.vx v8, v8, a0
-; LMULMAX8RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX8RV32-NEXT:    vzext.vf2 v9, v8
-; LMULMAX8RV32-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX8RV32-NEXT:    ret
-;
-; LMULMAX8RV64-LABEL: ui2fp_v3i7_v3f32:
-; LMULMAX8RV64:       # %bb.0:
-; LMULMAX8RV64-NEXT:    ld a1, 8(a0)
-; LMULMAX8RV64-NEXT:    ld a2, 0(a0)
-; LMULMAX8RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64-NEXT:    ld a0, 16(a0)
-; LMULMAX8RV64-NEXT:    vmv.v.x v8, a2
-; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX8RV64-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX8RV64-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX8RV64-NEXT:    li a0, 127
-; LMULMAX8RV64-NEXT:    vand.vx v8, v8, a0
-; LMULMAX8RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX8RV64-NEXT:    vzext.vf2 v9, v8
-; LMULMAX8RV64-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX8RV64-NEXT:    ret
-;
-; LMULMAX1RV32-LABEL: ui2fp_v3i7_v3f32:
-; LMULMAX1RV32:       # %bb.0:
-; LMULMAX1RV32-NEXT:    lw a1, 4(a0)
-; LMULMAX1RV32-NEXT:    lw a2, 0(a0)
-; LMULMAX1RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV32-NEXT:    lw a0, 8(a0)
-; LMULMAX1RV32-NEXT:    vmv.v.x v8, a2
-; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX1RV32-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX1RV32-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX1RV32-NEXT:    li a0, 127
-; LMULMAX1RV32-NEXT:    vand.vx v8, v8, a0
-; LMULMAX1RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX1RV32-NEXT:    vzext.vf2 v9, v8
-; LMULMAX1RV32-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX1RV32-NEXT:    ret
+; ZVFH32-LABEL: ui2fp_v3i7_v3f32:
+; ZVFH32:       # %bb.0:
+; ZVFH32-NEXT:    lw a1, 4(a0)
+; ZVFH32-NEXT:    lw a2, 0(a0)
+; ZVFH32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH32-NEXT:    lw a0, 8(a0)
+; ZVFH32-NEXT:    vmv.v.x v8, a2
+; ZVFH32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFH32-NEXT:    vslide1down.vx v8, v8, a0
+; ZVFH32-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFH32-NEXT:    li a0, 127
+; ZVFH32-NEXT:    vand.vx v8, v8, a0
+; ZVFH32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH32-NEXT:    vzext.vf2 v9, v8
+; ZVFH32-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFH32-NEXT:    ret
 ;
-; LMULMAX1RV64-LABEL: ui2fp_v3i7_v3f32:
-; LMULMAX1RV64:       # %bb.0:
-; LMULMAX1RV64-NEXT:    ld a1, 8(a0)
-; LMULMAX1RV64-NEXT:    ld a2, 0(a0)
-; LMULMAX1RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1RV64-NEXT:    ld a0, 16(a0)
-; LMULMAX1RV64-NEXT:    vmv.v.x v8, a2
-; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX1RV64-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX1RV64-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX1RV64-NEXT:    li a0, 127
-; LMULMAX1RV64-NEXT:    vand.vx v8, v8, a0
-; LMULMAX1RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX1RV64-NEXT:    vzext.vf2 v9, v8
-; LMULMAX1RV64-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX1RV64-NEXT:    ret
+; ZVFH64-LABEL: ui2fp_v3i7_v3f32:
+; ZVFH64:       # %bb.0:
+; ZVFH64-NEXT:    ld a1, 8(a0)
+; ZVFH64-NEXT:    ld a2, 0(a0)
+; ZVFH64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH64-NEXT:    ld a0, 16(a0)
+; ZVFH64-NEXT:    vmv.v.x v8, a2
+; ZVFH64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFH64-NEXT:    vslide1down.vx v8, v8, a0
+; ZVFH64-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFH64-NEXT:    li a0, 127
+; ZVFH64-NEXT:    vand.vx v8, v8, a0
+; ZVFH64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH64-NEXT:    vzext.vf2 v9, v8
+; ZVFH64-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFH64-NEXT:    ret
 ;
-; LMULMAX8RV32ZVFHMIN-LABEL: ui2fp_v3i7_v3f32:
-; LMULMAX8RV32ZVFHMIN:       # %bb.0:
-; LMULMAX8RV32ZVFHMIN-NEXT:    lw a1, 4(a0)
-; LMULMAX8RV32ZVFHMIN-NEXT:    lw a2, 0(a0)
-; LMULMAX8RV32ZVFHMIN-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV32ZVFHMIN-NEXT:    lw a0, 8(a0)
-; LMULMAX8RV32ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; LMULMAX8RV32ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX8RV32ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX8RV32ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX8RV32ZVFHMIN-NEXT:    li a0, 127
-; LMULMAX8RV32ZVFHMIN-NEXT:    vand.vx v8, v8, a0
-; LMULMAX8RV32ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX8RV32ZVFHMIN-NEXT:    vzext.vf2 v9, v8
-; LMULMAX8RV32ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX8RV32ZVFHMIN-NEXT:    ret
+; ZVFHMIN32-LABEL: ui2fp_v3i7_v3f32:
+; ZVFHMIN32:       # %bb.0:
+; ZVFHMIN32-NEXT:    lw a1, 4(a0)
+; ZVFHMIN32-NEXT:    lw a2, 0(a0)
+; ZVFHMIN32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN32-NEXT:    lw a0, 8(a0)
+; ZVFHMIN32-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN32-NEXT:    vslide1down.vx v8, v8, a0
+; ZVFHMIN32-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN32-NEXT:    li a0, 127
+; ZVFHMIN32-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN32-NEXT:    vzext.vf2 v9, v8
+; ZVFHMIN32-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFHMIN32-NEXT:    ret
 ;
-; LMULMAX8RV64ZVFHMIN-LABEL: ui2fp_v3i7_v3f32:
-; LMULMAX8RV64ZVFHMIN:       # %bb.0:
-; LMULMAX8RV64ZVFHMIN-NEXT:    ld a1, 8(a0)
-; LMULMAX8RV64ZVFHMIN-NEXT:    ld a2, 0(a0)
-; LMULMAX8RV64ZVFHMIN-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX8RV64ZVFHMIN-NEXT:    ld a0, 16(a0)
-; LMULMAX8RV64ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; LMULMAX8RV64ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; LMULMAX8RV64ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
-; LMULMAX8RV64ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
-; LMULMAX8RV64ZVFHMIN-NEXT:    li a0, 127
-; LMULMAX8RV64ZVFHMIN-NEXT:    vand.vx v8, v8, a0
-; LMULMAX8RV64ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX8RV64ZVFHMIN-NEXT:    vzext.vf2 v9, v8
-; LMULMAX8RV64ZVFHMIN-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX8RV64ZVFHMIN-NEXT:    ret
+; ZVFHMIN64-LABEL: ui2fp_v3i7_v3f32:
+; ZVFHMIN64:       # %bb.0:
+; ZVFHMIN64-NEXT:    ld a1, 8(a0)
+; ZVFHMIN64-NEXT:    ld a2, 0(a0)
+; ZVFHMIN64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN64-NEXT:    ld a0, 16(a0)
+; ZVFHMIN64-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN64-NEXT:    vslide1down.vx v8, v8, a0
+; ZVFHMIN64-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN64-NEXT:    li a0, 127
+; ZVFHMIN64-NEXT:    vand.vx v8, v8, a0
+; ZVFHMIN64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN64-NEXT:    vzext.vf2 v9, v8
+; ZVFHMIN64-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFHMIN64-NEXT:    ret
   %z = uitofp <3 x i7> %x to <3 x float>
   ret <3 x float> %z
 }
@@ -357,26 +287,13 @@ define <3 x float> @ui2fp_v3i1_v3f32(<3 x i1> %x) {
 }
 
 define void @si2fp_v8i32_v8f32(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: si2fp_v8i32_v8f32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfcvt.f.x.v v8, v8
-; LMULMAX8-NEXT:    vse32.v v8, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: si2fp_v8i32_v8f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    vle32.v v8, (a2)
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vfcvt.f.x.v v8, v8
-; LMULMAX1-NEXT:    vfcvt.f.x.v v9, v9
-; LMULMAX1-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: si2fp_v8i32_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
+; CHECK-NEXT:    vse32.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %d = sitofp <8 x i32> %a to <8 x float>
   store <8 x float> %d, ptr %y
@@ -384,26 +301,13 @@ define void @si2fp_v8i32_v8f32(ptr %x, ptr %y) {
 }
 
 define void @ui2fp_v8i32_v8f32(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: ui2fp_v8i32_v8f32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX8-NEXT:    vse32.v v8, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: ui2fp_v8i32_v8f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    vle32.v v8, (a2)
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX1-NEXT:    vfcvt.f.xu.v v9, v9
-; LMULMAX1-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ui2fp_v8i32_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
+; CHECK-NEXT:    vse32.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %d = uitofp <8 x i32> %a to <8 x float>
   store <8 x float> %d, ptr %y
@@ -411,61 +315,25 @@ define void @ui2fp_v8i32_v8f32(ptr %x, ptr %y) {
 }
 
 define <8 x float> @si2fp_v8i1_v8f32(<8 x i1> %x) {
-; LMULMAX8-LABEL: si2fp_v8i1_v8f32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    vmerge.vim v10, v8, -1, v0
-; LMULMAX8-NEXT:    vfwcvt.f.x.v v8, v10
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: si2fp_v8i1_v8f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v10, v9, -1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v10, 0
-; LMULMAX1-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v10, v9, -1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v9, v10
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: si2fp_v8i1_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vmerge.vim v10, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v10
+; CHECK-NEXT:    ret
   %z = sitofp <8 x i1> %x to <8 x float>
   ret <8 x float> %z
 }
 
 define <8 x float> @ui2fp_v8i1_v8f32(<8 x i1> %x) {
-; LMULMAX8-LABEL: ui2fp_v8i1_v8f32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    vmerge.vim v10, v8, 1, v0
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v8, v10
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: ui2fp_v8i1_v8f32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-NEXT:    vmerge.vim v10, v9, 1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v10, 0
-; LMULMAX1-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v10, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v10, v9, 1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v9, v10
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ui2fp_v8i1_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10
+; CHECK-NEXT:    ret
   %z = uitofp <8 x i1> %x to <8 x float>
   ret <8 x float> %z
 }
@@ -501,44 +369,14 @@ define void @ui2fp_v2i16_v2f64(ptr %x, ptr %y) {
 }
 
 define void @si2fp_v8i16_v8f64(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: si2fp_v8i16_v8f64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vsext.vf2 v10, v8
-; LMULMAX8-NEXT:    vfwcvt.f.x.v v12, v10
-; LMULMAX8-NEXT:    vse64.v v12, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: si2fp_v8i16_v8f64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v10, v9
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v9, v10
-; LMULMAX1-NEXT:    vsext.vf2 v10, v8
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v11, v10
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v10, v8
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v12, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vsext.vf2 v10, v8
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v8, v10
-; LMULMAX1-NEXT:    addi a0, a1, 48
-; LMULMAX1-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 32
-; LMULMAX1-NEXT:    vse64.v v12, (a0)
-; LMULMAX1-NEXT:    vse64.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: si2fp_v8i16_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.x.v v12, v10
+; CHECK-NEXT:    vse64.v v12, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %d = sitofp <8 x i16> %a to <8 x double>
   store <8 x double> %d, ptr %y
@@ -546,44 +384,14 @@ define void @si2fp_v8i16_v8f64(ptr %x, ptr %y) {
 }
 
 define void @ui2fp_v8i16_v8f64(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: ui2fp_v8i16_v8f64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-NEXT:    vzext.vf2 v10, v8
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-NEXT:    vse64.v v12, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: ui2fp_v8i16_v8f64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v10, v9
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v9, v10
-; LMULMAX1-NEXT:    vzext.vf2 v10, v8
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v11, v10
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v10, v8
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX1-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vzext.vf2 v10, v8
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v8, v10
-; LMULMAX1-NEXT:    addi a0, a1, 48
-; LMULMAX1-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 32
-; LMULMAX1-NEXT:    vse64.v v12, (a0)
-; LMULMAX1-NEXT:    vse64.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ui2fp_v8i16_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vfwcvt.f.xu.v v12, v10
+; CHECK-NEXT:    vse64.v v12, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x i16>, ptr %x
   %d = uitofp <8 x i16> %a to <8 x double>
   store <8 x double> %d, ptr %y
@@ -591,103 +399,25 @@ define void @ui2fp_v8i16_v8f64(ptr %x, ptr %y) {
 }
 
 define <8 x double> @si2fp_v8i1_v8f64(<8 x i1> %x) {
-; LMULMAX8-LABEL: si2fp_v8i1_v8f64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    vmerge.vim v12, v8, -1, v0
-; LMULMAX8-NEXT:    vfwcvt.f.x.v v8, v12
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: si2fp_v8i1_v8f64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vmv1r.v v10, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v11, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v11, -1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v12, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v12, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v9, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v13, v11, -1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v9, v13
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v13, 0
-; LMULMAX1-NEXT:    vmv1r.v v0, v10
-; LMULMAX1-NEXT:    vmerge.vim v10, v13, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v10, 0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v13, v11, -1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v10, v13
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v12, v12, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v12, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v12, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v12, v11, -1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v11, v12
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: si2fp_v8i1_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vmerge.vim v12, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v8, v12
+; CHECK-NEXT:    ret
   %z = sitofp <8 x i1> %x to <8 x double>
   ret <8 x double> %z
 }
 
 define <8 x double> @ui2fp_v8i1_v8f64(<8 x i1> %x) {
-; LMULMAX8-LABEL: ui2fp_v8i1_v8f64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    vmerge.vim v12, v8, 1, v0
-; LMULMAX8-NEXT:    vfwcvt.f.xu.v v8, v12
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: ui2fp_v8i1_v8f64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vmv1r.v v10, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v11, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v11, 1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v12, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v12, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v9, v9, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v13, v11, 1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v9, v13
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v13, 0
-; LMULMAX1-NEXT:    vmv1r.v v0, v10
-; LMULMAX1-NEXT:    vmerge.vim v10, v13, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v10, 0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v13, v11, 1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v10, v13
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v12, v12, 1, v0
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v12, v12, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmsne.vi v0, v12, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vmerge.vim v12, v11, 1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v11, v12
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ui2fp_v8i1_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vmerge.vim v12, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12
+; CHECK-NEXT:    ret
   %z = uitofp <8 x i1> %x to <8 x double>
   ret <8 x double> %z
 }
@@ -725,107 +455,57 @@ define void @ui2fp_v2i64_v2f16(ptr %x, ptr %y) {
 }
 
 define <2 x half> @si2fp_v2i1_v2f16(<2 x i1> %x) {
-; LMULMAX8ZVFH-LABEL: si2fp_v2i1_v2f16:
-; LMULMAX8ZVFH:       # %bb.0:
-; LMULMAX8ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX8ZVFH-NEXT:    vmv.v.i v8, 0
-; LMULMAX8ZVFH-NEXT:    vmerge.vim v9, v8, -1, v0
-; LMULMAX8ZVFH-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX8ZVFH-NEXT:    ret
-;
-; LMULMAX1-LABEL: si2fp_v2i1_v2f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v8, -1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX1-NEXT:    ret
+; ZVFH-LABEL: si2fp_v2i1_v2f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; ZVFH-NEXT:    vmv.v.i v8, 0
+; ZVFH-NEXT:    vmerge.vim v9, v8, -1, v0
+; ZVFH-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFH-NEXT:    ret
 ;
-; LMULMAX8ZVFHMIN-LABEL: si2fp_v2i1_v2f16:
-; LMULMAX8ZVFHMIN:       # %bb.0:
-; LMULMAX8ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX8ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; LMULMAX8ZVFHMIN-NEXT:    vmerge.vim v8, v8, -1, v0
-; LMULMAX8ZVFHMIN-NEXT:    vfwcvt.f.x.v v9, v8
-; LMULMAX8ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
-; LMULMAX8ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: si2fp_v2i1_v2f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.i v8, 0
+; ZVFHMIN-NEXT:    vmerge.vim v8, v8, -1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v9, v8
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
   %z = sitofp <2 x i1> %x to <2 x half>
   ret <2 x half> %z
 }
 
 define <2 x half> @ui2fp_v2i1_v2f16(<2 x i1> %x) {
-; LMULMAX8ZVFH-LABEL: ui2fp_v2i1_v2f16:
-; LMULMAX8ZVFH:       # %bb.0:
-; LMULMAX8ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX8ZVFH-NEXT:    vmv.v.i v8, 0
-; LMULMAX8ZVFH-NEXT:    vmerge.vim v9, v8, 1, v0
-; LMULMAX8ZVFH-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX8ZVFH-NEXT:    ret
+; ZVFH-LABEL: ui2fp_v2i1_v2f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; ZVFH-NEXT:    vmv.v.i v8, 0
+; ZVFH-NEXT:    vmerge.vim v9, v8, 1, v0
+; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFH-NEXT:    ret
 ;
-; LMULMAX1-LABEL: ui2fp_v2i1_v2f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v8, 1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8ZVFHMIN-LABEL: ui2fp_v2i1_v2f16:
-; LMULMAX8ZVFHMIN:       # %bb.0:
-; LMULMAX8ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; LMULMAX8ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; LMULMAX8ZVFHMIN-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8ZVFHMIN-NEXT:    vfwcvt.f.xu.v v9, v8
-; LMULMAX8ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
-; LMULMAX8ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: ui2fp_v2i1_v2f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.i v8, 0
+; ZVFHMIN-NEXT:    vmerge.vim v8, v8, 1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v9, v8
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
   %z = uitofp <2 x i1> %x to <2 x half>
   ret <2 x half> %z
 }
 
 define void @si2fp_v8i64_v8f16(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: si2fp_v8i64_v8f16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    vfncvt.f.x.w v12, v8
-; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vfncvt.f.f.w v8, v12
-; LMULMAX8-NEXT:    vse16.v v8, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: si2fp_v8i64_v8f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a2, a0, 48
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vle64.v v8, (a2)
-; LMULMAX1-NEXT:    addi a2, a0, 32
-; LMULMAX1-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle64.v v11, (a0)
-; LMULMAX1-NEXT:    vfncvt.f.x.w v12, v9
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v9, v12
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.x.w v12, v11
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.x.w v11, v10
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.x.w v10, v8
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v8, 6
-; LMULMAX1-NEXT:    vse16.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: si2fp_v8i64_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfncvt.f.x.w v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x i64>, ptr %x
   %d = sitofp <8 x i64> %a to <8 x half>
   store <8 x half> %d, ptr %y
@@ -833,49 +513,15 @@ define void @si2fp_v8i64_v8f16(ptr %x, ptr %y) {
 }
 
 define void @ui2fp_v8i64_v8f16(ptr %x, ptr %y) {
-; LMULMAX8-LABEL: ui2fp_v8i64_v8f16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    vfncvt.f.xu.w v12, v8
-; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vfncvt.f.f.w v8, v12
-; LMULMAX8-NEXT:    vse16.v v8, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX1-LABEL: ui2fp_v8i64_v8f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a2, a0, 48
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vle64.v v8, (a2)
-; LMULMAX1-NEXT:    addi a2, a0, 32
-; LMULMAX1-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle64.v v11, (a0)
-; LMULMAX1-NEXT:    vfncvt.f.xu.w v12, v9
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v9, v12
-; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.xu.w v12, v11
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v11, v12
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v11, 2
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.xu.w v11, v10
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v10, v11
-; LMULMAX1-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v10, 4
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.xu.w v10, v8
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; LMULMAX1-NEXT:    vfncvt.f.f.w v8, v10
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v8, 6
-; LMULMAX1-NEXT:    vse16.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: ui2fp_v8i64_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vfncvt.f.f.w v8, v12
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x i64>, ptr %x
   %d = uitofp <8 x i64> %a to <8 x half>
   store <8 x half> %d, ptr %y
@@ -883,59 +529,43 @@ define void @ui2fp_v8i64_v8f16(ptr %x, ptr %y) {
 }
 
 define <8 x half> @si2fp_v8i1_v8f16(<8 x i1> %x) {
-; LMULMAX8ZVFH-LABEL: si2fp_v8i1_v8f16:
-; LMULMAX8ZVFH:       # %bb.0:
-; LMULMAX8ZVFH-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX8ZVFH-NEXT:    vmv.v.i v8, 0
-; LMULMAX8ZVFH-NEXT:    vmerge.vim v9, v8, -1, v0
-; LMULMAX8ZVFH-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX8ZVFH-NEXT:    ret
-;
-; LMULMAX1-LABEL: si2fp_v8i1_v8f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v8, -1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.x.v v8, v9
-; LMULMAX1-NEXT:    ret
+; ZVFH-LABEL: si2fp_v8i1_v8f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; ZVFH-NEXT:    vmv.v.i v8, 0
+; ZVFH-NEXT:    vmerge.vim v9, v8, -1, v0
+; ZVFH-NEXT:    vfwcvt.f.x.v v8, v9
+; ZVFH-NEXT:    ret
 ;
-; LMULMAX8ZVFHMIN-LABEL: si2fp_v8i1_v8f16:
-; LMULMAX8ZVFHMIN:       # %bb.0:
-; LMULMAX8ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; LMULMAX8ZVFHMIN-NEXT:    vmerge.vim v8, v8, -1, v0
-; LMULMAX8ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v8
-; LMULMAX8ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; LMULMAX8ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: si2fp_v8i1_v8f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.i v8, 0
+; ZVFHMIN-NEXT:    vmerge.vim v8, v8, -1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.x.v v10, v8
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
   %z = sitofp <8 x i1> %x to <8 x half>
   ret <8 x half> %z
 }
 
 define <8 x half> @ui2fp_v8i1_v8f16(<8 x i1> %x) {
-; LMULMAX8ZVFH-LABEL: ui2fp_v8i1_v8f16:
-; LMULMAX8ZVFH:       # %bb.0:
-; LMULMAX8ZVFH-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX8ZVFH-NEXT:    vmv.v.i v8, 0
-; LMULMAX8ZVFH-NEXT:    vmerge.vim v9, v8, 1, v0
-; LMULMAX8ZVFH-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX8ZVFH-NEXT:    ret
-;
-; LMULMAX1-LABEL: ui2fp_v8i1_v8f16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vmerge.vim v9, v8, 1, v0
-; LMULMAX1-NEXT:    vfwcvt.f.xu.v v8, v9
-; LMULMAX1-NEXT:    ret
+; ZVFH-LABEL: ui2fp_v8i1_v8f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; ZVFH-NEXT:    vmv.v.i v8, 0
+; ZVFH-NEXT:    vmerge.vim v9, v8, 1, v0
+; ZVFH-NEXT:    vfwcvt.f.xu.v v8, v9
+; ZVFH-NEXT:    ret
 ;
-; LMULMAX8ZVFHMIN-LABEL: ui2fp_v8i1_v8f16:
-; LMULMAX8ZVFHMIN:       # %bb.0:
-; LMULMAX8ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8ZVFHMIN-NEXT:    vmv.v.i v8, 0
-; LMULMAX8ZVFHMIN-NEXT:    vmerge.vim v8, v8, 1, v0
-; LMULMAX8ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; LMULMAX8ZVFHMIN-NEXT:    ret
+; ZVFHMIN-LABEL: ui2fp_v8i1_v8f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.i v8, 0
+; ZVFHMIN-NEXT:    vmerge.vim v8, v8, 1, v0
+; ZVFHMIN-NEXT:    vfwcvt.f.xu.v v10, v8
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
   %z = uitofp <8 x i1> %x to <8 x half>
   ret <8 x half> %z
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index a77c49c942561b..45c6268d8b3622 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -1,13 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, ptr %svp) {
 ; CHECK-LABEL: insert_nxv8i32_v2i32_0:
@@ -49,50 +45,26 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
 }
 
 define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, ptr %svp) {
-; LMULMAX2-LABEL: insert_nxv8i32_v8i32_0:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v12, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m4, tu, ma
-; LMULMAX2-NEXT:    vmv.v.v v8, v12
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v12, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v16, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m4, tu, ma
-; LMULMAX1-NEXT:    vmv.v.v v8, v12
-; LMULMAX1-NEXT:    vsetivli zero, 8, e32, m4, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v16, 4
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: insert_nxv8i32_v8i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e32, m4, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
   %sv = load <8 x i32>, ptr %svp
   %v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0)
   ret <vscale x 8 x i32> %v
 }
 
 define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, ptr %svp) {
-; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v12, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 16, e32, m4, tu, ma
-; LMULMAX2-NEXT:    vslideup.vi v8, v12, 8
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v12, (a1)
-; LMULMAX1-NEXT:    vle32.v v16, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 12, e32, m4, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v16, 8
-; LMULMAX1-NEXT:    vsetivli zero, 16, e32, m4, tu, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v12, 12
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: insert_nxv8i32_v8i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v12, 8
+; CHECK-NEXT:    ret
   %sv = load <8 x i32>, ptr %svp
   %v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8)
   ret <vscale x 8 x i32> %v
@@ -160,29 +132,17 @@ define void @insert_v4i32_undef_v2i32_0(ptr %vp, ptr %svp) {
 }
 
 define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
-; LMULMAX2-LABEL: insert_v8i32_v2i32_0:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a1)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v10, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
-; LMULMAX2-NEXT:    vmv.v.v v10, v8
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vse32.v v10, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: insert_v8i32_v2i32_0:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
-; LMULMAX1-NEXT:    vmv.v.v v9, v8
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: insert_v8i32_v2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e32, m2, tu, ma
+; CHECK-NEXT:    vmv.v.v v10, v8
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vse32.v v10, (a0)
+; CHECK-NEXT:    ret
   %sv = load <2 x i32>, ptr %svp
   %vec = load <8 x i32>, ptr %vp
   %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 0)
@@ -191,27 +151,17 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
 }
 
 define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
-; LMULMAX2-LABEL: insert_v8i32_v2i32_2:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a1)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v10, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
-; LMULMAX2-NEXT:    vslideup.vi v10, v8, 2
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vse32.v v10, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: insert_v8i32_v2i32_2:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vslideup.vi v9, v8, 2
-; LMULMAX1-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: insert_v8i32_v2i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v10, v8, 2
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vse32.v v10, (a0)
+; CHECK-NEXT:    ret
   %sv = load <2 x i32>, ptr %svp
   %vec = load <8 x i32>, ptr %vp
   %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 2)
@@ -220,26 +170,15 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
 }
 
 define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) {
-; LMULMAX2-LABEL: insert_v8i32_v2i32_6:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a1)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v10, (a0)
-; LMULMAX2-NEXT:    vslideup.vi v10, v8, 6
-; LMULMAX2-NEXT:    vse32.v v10, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: insert_v8i32_v2i32_6:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vslideup.vi v9, v8, 2
-; LMULMAX1-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: insert_v8i32_v2i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vslideup.vi v10, v8, 6
+; CHECK-NEXT:    vse32.v v10, (a0)
+; CHECK-NEXT:    ret
   %sv = load <2 x i32>, ptr %svp
   %vec = load <8 x i32>, ptr %vp
   %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 6)
@@ -248,24 +187,14 @@ define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) {
 }
 
 define void @insert_v8i32_undef_v2i32_6(ptr %vp, ptr %svp) {
-; LMULMAX2-LABEL: insert_v8i32_undef_v2i32_6:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a1)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vslideup.vi v10, v8, 6
-; LMULMAX2-NEXT:    vse32.v v10, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: insert_v8i32_undef_v2i32_6:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v9, v8, 2
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: insert_v8i32_undef_v2i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v10, v8, 6
+; CHECK-NEXT:    vse32.v v10, (a0)
+; CHECK-NEXT:    ret
   %sv = load <2 x i32>, ptr %svp
   %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> undef, <2 x i32> %sv, i64 6)
   store <8 x i32> %v, ptr %vp
@@ -310,30 +239,18 @@ define void @insert_v4i16_v2i16_2(ptr %vp, ptr %svp) {
 }
 
 define void @insert_v32i1_v8i1_0(ptr %vp, ptr %svp) {
-; LMULMAX2-LABEL: insert_v32i1_v8i1_0:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vlm.v v8, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX2-NEXT:    vlm.v v9, (a1)
-; LMULMAX2-NEXT:    vsetivli zero, 1, e8, mf4, tu, ma
-; LMULMAX2-NEXT:    vmv.v.v v8, v9
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vsm.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: insert_v32i1_v8i1_0:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vlm.v v8, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vlm.v v9, (a1)
-; LMULMAX1-NEXT:    vsetivli zero, 1, e8, mf8, tu, ma
-; LMULMAX1-NEXT:    vmv.v.v v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vsm.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: insert_v32i1_v8i1_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vlm.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vlm.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf4, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vsm.v v8, (a0)
+; CHECK-NEXT:    ret
   %v = load <32 x i1>, ptr %vp
   %sv = load <8 x i1>, ptr %svp
   %c = call <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 0)
@@ -342,31 +259,18 @@ define void @insert_v32i1_v8i1_0(ptr %vp, ptr %svp) {
 }
 
 define void @insert_v32i1_v8i1_16(ptr %vp, ptr %svp) {
-; LMULMAX2-LABEL: insert_v32i1_v8i1_16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vlm.v v8, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX2-NEXT:    vlm.v v9, (a1)
-; LMULMAX2-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
-; LMULMAX2-NEXT:    vslideup.vi v8, v9, 2
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vsm.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: insert_v32i1_v8i1_16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a0, a0, 2
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vlm.v v8, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vlm.v v9, (a1)
-; LMULMAX1-NEXT:    vsetivli zero, 1, e8, mf8, tu, ma
-; LMULMAX1-NEXT:    vmv.v.v v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vsm.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: insert_v32i1_v8i1_16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vlm.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vlm.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vsm.v v8, (a0)
+; CHECK-NEXT:    ret
   %v = load <32 x i1>, ptr %vp
   %sv = load <8 x i1>, ptr %svp
   %c = call <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 16)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
index 2c3bc2ef4fe564..c65f6e5fa7866f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
@@ -1,10 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
 
 define void @sext_v4i8_v4i32(ptr %x, ptr %z) {
 ; CHECK-LABEL: sext_v4i8_v4i32:
@@ -35,36 +31,13 @@ define void @zext_v4i8_v4i32(ptr %x, ptr %z) {
 }
 
 define void @sext_v8i8_v8i32(ptr %x, ptr %z) {
-; LMULMAX8-LABEL: sext_v8i8_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-NEXT:    vsext.vf4 v10, v8
-; LMULMAX8-NEXT:    vse32.v v10, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: sext_v8i8_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vsext.vf4 v10, v8
-; LMULMAX2-NEXT:    vse32.v v10, (a1)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: sext_v8i8_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v9, v8
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v10, v8
-; LMULMAX1-NEXT:    addi a0, a1, 16
-; LMULMAX1-NEXT:    vse32.v v10, (a0)
-; LMULMAX1-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: sext_v8i8_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vse32.v v10, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x i8>, ptr %x
   %b = sext <8 x i8> %a to <8 x i32>
   store <8 x i32> %b, ptr %z
@@ -72,90 +45,14 @@ define void @sext_v8i8_v8i32(ptr %x, ptr %z) {
 }
 
 define void @sext_v32i8_v32i32(ptr %x, ptr %z) {
-; LMULMAX8-LABEL: sext_v32i8_v32i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a2, 32
-; LMULMAX8-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; LMULMAX8-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-NEXT:    vsext.vf4 v16, v8
-; LMULMAX8-NEXT:    vse32.v v16, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: sext_v32i8_v32i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX2-NEXT:    vslidedown.vi v10, v8, 8
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vsext.vf4 v12, v10
-; LMULMAX2-NEXT:    vsext.vf4 v10, v8
-; LMULMAX2-NEXT:    vsetivli zero, 16, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vslidedown.vi v8, v8, 16
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vsext.vf4 v14, v8
-; LMULMAX2-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX2-NEXT:    vslidedown.vi v8, v8, 8
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vsext.vf4 v16, v8
-; LMULMAX2-NEXT:    addi a0, a1, 96
-; LMULMAX2-NEXT:    vse32.v v16, (a0)
-; LMULMAX2-NEXT:    addi a0, a1, 64
-; LMULMAX2-NEXT:    vse32.v v14, (a0)
-; LMULMAX2-NEXT:    vse32.v v10, (a1)
-; LMULMAX2-NEXT:    addi a0, a1, 32
-; LMULMAX2-NEXT:    vse32.v v12, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: sext_v32i8_v32i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    vle8.v v8, (a2)
-; LMULMAX1-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v8, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v11, v10
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v10, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v12, v10
-; LMULMAX1-NEXT:    vsext.vf4 v10, v8
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 8
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v13, v8
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v8, v8, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v14, v8
-; LMULMAX1-NEXT:    vsext.vf4 v8, v9
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v9, v9, 8
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v15, v9
-; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslidedown.vi v9, v9, 4
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vsext.vf4 v16, v9
-; LMULMAX1-NEXT:    addi a0, a1, 48
-; LMULMAX1-NEXT:    vse32.v v16, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 32
-; LMULMAX1-NEXT:    vse32.v v15, (a0)
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    addi a0, a1, 112
-; LMULMAX1-NEXT:    vse32.v v14, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 96
-; LMULMAX1-NEXT:    vse32.v v13, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 64
-; LMULMAX1-NEXT:    vse32.v v10, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 16
-; LMULMAX1-NEXT:    vse32.v v12, (a0)
-; LMULMAX1-NEXT:    addi a0, a1, 80
-; LMULMAX1-NEXT:    vse32.v v11, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: sext_v32i8_v32i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vse32.v v16, (a1)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = sext <32 x i8> %a to <32 x i32>
   store <32 x i32> %b, ptr %z
@@ -179,43 +76,15 @@ define void @trunc_v4i8_v4i32(ptr %x, ptr %z) {
 }
 
 define void @trunc_v8i8_v8i32(ptr %x, ptr %z) {
-; LMULMAX8-LABEL: trunc_v8i8_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX8-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; LMULMAX8-NEXT:    vnsrl.wi v8, v10, 0
-; LMULMAX8-NEXT:    vse8.v v8, (a1)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: trunc_v8i8_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vnsrl.wi v10, v8, 0
-; LMULMAX2-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; LMULMAX2-NEXT:    vnsrl.wi v8, v10, 0
-; LMULMAX2-NEXT:    vse8.v v8, (a1)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: trunc_v8i8_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: trunc_v8i8_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = trunc <8 x i32> %a to <8 x i8>
   store <8 x i8> %b, ptr %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
index df7a989859eebf..60202cfba760d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
@@ -1,10 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV32
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV64
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define void @splat_v16i8(ptr %x, i8 %y) {
 ; CHECK-LABEL: splat_v16i8:
@@ -46,65 +42,25 @@ define void @splat_v4i32(ptr %x, i32 %y) {
 }
 
 define void @splat_v2i64(ptr %x, i64 %y) {
-; LMULMAX8-RV32-LABEL: splat_v2i64:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    addi sp, sp, -16
-; LMULMAX8-RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX8-RV32-NEXT:    sw a2, 12(sp)
-; LMULMAX8-RV32-NEXT:    sw a1, 8(sp)
-; LMULMAX8-RV32-NEXT:    addi a1, sp, 8
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX8-RV32-NEXT:    vlse64.v v8, (a1), zero
-; LMULMAX8-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    addi sp, sp, 16
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX2-RV32-LABEL: splat_v2i64:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    addi sp, sp, -16
-; LMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX2-RV32-NEXT:    sw a2, 12(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 8(sp)
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vlse64.v v8, (a1), zero
-; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    addi sp, sp, 16
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: splat_v2i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi sp, sp, -16
-; LMULMAX1-RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX1-RV32-NEXT:    sw a2, 12(sp)
-; LMULMAX1-RV32-NEXT:    sw a1, 8(sp)
-; LMULMAX1-RV32-NEXT:    addi a1, sp, 8
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vlse64.v v8, (a1), zero
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi sp, sp, 16
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: splat_v2i64:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX8-RV64-NEXT:    vmv.v.x v8, a1
-; LMULMAX8-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: splat_v2i64:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: splat_v2i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; RV32-LABEL: splat_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a2, 12(sp)
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v8, (a1), zero
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: splat_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
   %a = insertelement <2 x i64> poison, i64 %y, i32 0
   %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer
   store <2 x i64> %b, ptr %x
@@ -112,30 +68,13 @@ define void @splat_v2i64(ptr %x, i64 %y) {
 }
 
 define void @splat_v32i8(ptr %x, i8 %y) {
-; LMULMAX8-LABEL: splat_v32i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a2, 32
-; LMULMAX8-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.x v8, a1
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_v32i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <32 x i8> poison, i8 %y, i32 0
   %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer
   store <32 x i8> %b, ptr %x
@@ -143,28 +82,12 @@ define void @splat_v32i8(ptr %x, i8 %y) {
 }
 
 define void @splat_v16i16(ptr %x, i16 %y) {
-; LMULMAX8-LABEL: splat_v16i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.x v8, a1
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <16 x i16> poison, i16 %y, i32 0
   %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer
   store <16 x i16> %b, ptr %x
@@ -172,28 +95,12 @@ define void @splat_v16i16(ptr %x, i16 %y) {
 }
 
 define void @splat_v8i32(ptr %x, i32 %y) {
-; LMULMAX8-LABEL: splat_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.x v8, a1
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <8 x i32> poison, i32 %y, i32 0
   %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer
   store <8 x i32> %b, ptr %x
@@ -201,65 +108,25 @@ define void @splat_v8i32(ptr %x, i32 %y) {
 }
 
 define void @splat_v4i64(ptr %x, i64 %y) {
-; LMULMAX8-RV32-LABEL: splat_v4i64:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    addi sp, sp, -16
-; LMULMAX8-RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX8-RV32-NEXT:    sw a2, 12(sp)
-; LMULMAX8-RV32-NEXT:    sw a1, 8(sp)
-; LMULMAX8-RV32-NEXT:    addi a1, sp, 8
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX8-RV32-NEXT:    vlse64.v v8, (a1), zero
-; LMULMAX8-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    addi sp, sp, 16
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX2-RV32-LABEL: splat_v4i64:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    addi sp, sp, -16
-; LMULMAX2-RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX2-RV32-NEXT:    sw a2, 12(sp)
-; LMULMAX2-RV32-NEXT:    sw a1, 8(sp)
-; LMULMAX2-RV32-NEXT:    addi a1, sp, 8
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vlse64.v v8, (a1), zero
-; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    addi sp, sp, 16
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: splat_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v0, 5
-; LMULMAX1-RV32-NEXT:    vmv.v.x v8, a2
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v8, v8, a1, v0
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: splat_v4i64:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX8-RV64-NEXT:    vmv.v.x v8, a1
-; LMULMAX8-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: splat_v4i64:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: splat_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; RV32-LABEL: splat_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a2, 12(sp)
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v8, (a1), zero
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: splat_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
   %a = insertelement <4 x i64> poison, i64 %y, i32 0
   %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer
   store <4 x i64> %b, ptr %x
@@ -319,30 +186,13 @@ define void @splat_zero_v2i64(ptr %x) {
 }
 
 define void @splat_zero_v32i8(ptr %x) {
-; LMULMAX8-LABEL: splat_zero_v32i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_zero_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_zero_v32i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_zero_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <32 x i8> poison, i8 0, i32 0
   %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer
   store <32 x i8> %b, ptr %x
@@ -350,28 +200,12 @@ define void @splat_zero_v32i8(ptr %x) {
 }
 
 define void @splat_zero_v16i16(ptr %x) {
-; LMULMAX8-LABEL: splat_zero_v16i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_zero_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_zero_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_zero_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <16 x i16> poison, i16 0, i32 0
   %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer
   store <16 x i16> %b, ptr %x
@@ -379,28 +213,12 @@ define void @splat_zero_v16i16(ptr %x) {
 }
 
 define void @splat_zero_v8i32(ptr %x) {
-; LMULMAX8-LABEL: splat_zero_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_zero_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_zero_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_zero_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <8 x i32> poison, i32 0, i32 0
   %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer
   store <8 x i32> %b, ptr %x
@@ -408,37 +226,12 @@ define void @splat_zero_v8i32(ptr %x) {
 }
 
 define void @splat_zero_v4i64(ptr %x) {
-; LMULMAX8-LABEL: splat_zero_v4i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_zero_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: splat_zero_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a0, a0, 16
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: splat_zero_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 16
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: splat_zero_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <4 x i64> poison, i64 0, i32 0
   %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer
   store <4 x i64> %b, ptr %x
@@ -467,81 +260,33 @@ define void @splat_zero_v2i16_unaligned(ptr %p) {
 }
 
 define void @splat_zero_v4i16(ptr %p) {
-; LMULMAX8-RV32-LABEL: splat_zero_v4i16:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX2-RV32-LABEL: splat_zero_v4i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: splat_zero_v4i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: splat_zero_v4i16:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    sd zero, 0(a0)
-; LMULMAX8-RV64-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: splat_zero_v4i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    sd zero, 0(a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: splat_zero_v4i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    sd zero, 0(a0)
-; LMULMAX1-RV64-NEXT:    ret
+; RV32-LABEL: splat_zero_v4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT:    vmv.v.i v8, 0
+; RV32-NEXT:    vse16.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: splat_zero_v4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sd zero, 0(a0)
+; RV64-NEXT:    ret
   store <4 x i16> zeroinitializer, ptr %p
   ret void
 }
 
 define void @splat_zero_v2i32(ptr %p) {
-; LMULMAX8-RV32-LABEL: splat_zero_v2i32:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX2-RV32-LABEL: splat_zero_v2i32:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: splat_zero_v2i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: splat_zero_v2i32:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    sd zero, 0(a0)
-; LMULMAX8-RV64-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: splat_zero_v2i32:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    sd zero, 0(a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: splat_zero_v2i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    sd zero, 0(a0)
-; LMULMAX1-RV64-NEXT:    ret
+; RV32-LABEL: splat_zero_v2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT:    vmv.v.i v8, 0
+; RV32-NEXT:    vse32.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: splat_zero_v2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sd zero, 0(a0)
+; RV64-NEXT:    ret
   store <2 x i32> zeroinitializer, ptr %p
   ret void
 }
@@ -612,30 +357,13 @@ define void @splat_allones_v2i64(ptr %x) {
 }
 
 define void @splat_allones_v32i8(ptr %x) {
-; LMULMAX8-LABEL: splat_allones_v32i8:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a1, 32
-; LMULMAX8-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, -1
-; LMULMAX8-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_allones_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, -1
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_allones_v32i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, -1
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_allones_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, -1
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <32 x i8> poison, i8 -1, i32 0
   %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer
   store <32 x i8> %b, ptr %x
@@ -643,28 +371,12 @@ define void @splat_allones_v32i8(ptr %x) {
 }
 
 define void @splat_allones_v16i16(ptr %x) {
-; LMULMAX8-LABEL: splat_allones_v16i16:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, -1
-; LMULMAX8-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_allones_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, -1
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_allones_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, -1
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_allones_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, -1
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <16 x i16> poison, i16 -1, i32 0
   %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer
   store <16 x i16> %b, ptr %x
@@ -672,28 +384,12 @@ define void @splat_allones_v16i16(ptr %x) {
 }
 
 define void @splat_allones_v8i32(ptr %x) {
-; LMULMAX8-LABEL: splat_allones_v8i32:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, -1
-; LMULMAX8-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_allones_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, -1
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: splat_allones_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v8, -1
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: splat_allones_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, -1
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <8 x i32> poison, i32 -1, i32 0
   %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer
   store <8 x i32> %b, ptr %x
@@ -701,37 +397,12 @@ define void @splat_allones_v8i32(ptr %x) {
 }
 
 define void @splat_allones_v4i64(ptr %x) {
-; LMULMAX8-LABEL: splat_allones_v4i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX8-NEXT:    vmv.v.i v8, -1
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_allones_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.i v8, -1
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: splat_allones_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, -1
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a0, a0, 16
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: splat_allones_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.i v8, -1
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 16
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: splat_allones_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, -1
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <4 x i64> poison, i64 -1, i32 0
   %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer
   store <4 x i64> %b, ptr %x
@@ -743,48 +414,13 @@ define void @splat_allones_v4i64(ptr %x) {
 ; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x
 ; with SEW=64 on RV32.
 define void @splat_allones_with_use_v4i64(ptr %x) {
-; LMULMAX8-LABEL: splat_allones_with_use_v4i64:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX8-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-NEXT:    vadd.vi v8, v8, -1
-; LMULMAX8-NEXT:    vse64.v v8, (a0)
-; LMULMAX8-NEXT:    ret
-;
-; LMULMAX2-LABEL: splat_allones_with_use_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vadd.vi v8, v8, -1
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: splat_allones_with_use_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v10, -1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: splat_allones_with_use_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vadd.vi v8, v8, -1
-; LMULMAX1-RV64-NEXT:    vadd.vi v9, v9, -1
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: splat_allones_with_use_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vadd.vi v8, v8, -1
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = add <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
   store <4 x i64> %b, ptr %x
@@ -796,171 +432,28 @@ define void @splat_allones_with_use_v4i64(ptr %x) {
 ; which exceeded maximum-expected size of 512. The scalable container type of
 ; nxv8i64 should have been used instead.
 define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) {
-; LMULMAX8-RV32-LABEL: vadd_vx_v16i64:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    addi sp, sp, -16
-; LMULMAX8-RV32-NEXT:    .cfi_def_cfa_offset 16
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; LMULMAX8-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    sw a2, 12(sp)
-; LMULMAX8-RV32-NEXT:    sw a1, 8(sp)
-; LMULMAX8-RV32-NEXT:    addi a0, sp, 8
-; LMULMAX8-RV32-NEXT:    vlse64.v v16, (a0), zero
-; LMULMAX8-RV32-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX8-RV32-NEXT:    vse64.v v8, (a3)
-; LMULMAX8-RV32-NEXT:    addi sp, sp, 16
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX2-RV32-LABEL: vadd_vx_v16i64:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    addi a4, a0, 64
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle64.v v8, (a4)
-; LMULMAX2-RV32-NEXT:    addi a4, a0, 96
-; LMULMAX2-RV32-NEXT:    vle64.v v10, (a4)
-; LMULMAX2-RV32-NEXT:    vle64.v v12, (a0)
-; LMULMAX2-RV32-NEXT:    addi a0, a0, 32
-; LMULMAX2-RV32-NEXT:    vle64.v v14, (a0)
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    li a0, 85
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a0
-; LMULMAX2-RV32-NEXT:    vmv.v.x v16, a2
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v16, v16, a1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vadd.vv v14, v14, v16
-; LMULMAX2-RV32-NEXT:    vadd.vv v12, v12, v16
-; LMULMAX2-RV32-NEXT:    vadd.vv v10, v10, v16
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX2-RV32-NEXT:    addi a0, a3, 64
-; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    addi a0, a3, 96
-; LMULMAX2-RV32-NEXT:    vse64.v v10, (a0)
-; LMULMAX2-RV32-NEXT:    vse64.v v12, (a3)
-; LMULMAX2-RV32-NEXT:    addi a0, a3, 32
-; LMULMAX2-RV32-NEXT:    vse64.v v14, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: vadd_vx_v16i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi a4, a0, 96
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a4)
-; LMULMAX1-RV32-NEXT:    addi a4, a0, 112
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a4)
-; LMULMAX1-RV32-NEXT:    addi a4, a0, 64
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a4)
-; LMULMAX1-RV32-NEXT:    addi a4, a0, 80
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a4)
-; LMULMAX1-RV32-NEXT:    addi a4, a0, 32
-; LMULMAX1-RV32-NEXT:    vle64.v v12, (a4)
-; LMULMAX1-RV32-NEXT:    addi a4, a0, 48
-; LMULMAX1-RV32-NEXT:    vle64.v v13, (a4)
-; LMULMAX1-RV32-NEXT:    vle64.v v14, (a0)
-; LMULMAX1-RV32-NEXT:    addi a0, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v15, (a0)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v0, 5
-; LMULMAX1-RV32-NEXT:    vmv.v.x v16, a2
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v16, v16, a1, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v15, v15, v16
-; LMULMAX1-RV32-NEXT:    vadd.vv v14, v14, v16
-; LMULMAX1-RV32-NEXT:    vadd.vv v13, v13, v16
-; LMULMAX1-RV32-NEXT:    vadd.vv v12, v12, v16
-; LMULMAX1-RV32-NEXT:    vadd.vv v11, v11, v16
-; LMULMAX1-RV32-NEXT:    vadd.vv v10, v10, v16
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v16
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v16
-; LMULMAX1-RV32-NEXT:    addi a0, a3, 96
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a0, a3, 112
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    addi a0, a3, 64
-; LMULMAX1-RV32-NEXT:    vse64.v v10, (a0)
-; LMULMAX1-RV32-NEXT:    addi a0, a3, 80
-; LMULMAX1-RV32-NEXT:    vse64.v v11, (a0)
-; LMULMAX1-RV32-NEXT:    addi a0, a3, 32
-; LMULMAX1-RV32-NEXT:    vse64.v v12, (a0)
-; LMULMAX1-RV32-NEXT:    addi a0, a3, 48
-; LMULMAX1-RV32-NEXT:    vse64.v v13, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v14, (a3)
-; LMULMAX1-RV32-NEXT:    addi a3, a3, 16
-; LMULMAX1-RV32-NEXT:    vse64.v v15, (a3)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: vadd_vx_v16i64:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; LMULMAX8-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vadd.vx v8, v8, a1
-; LMULMAX8-RV64-NEXT:    vse64.v v8, (a2)
-; LMULMAX8-RV64-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: vadd_vx_v16i64:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    addi a3, a0, 96
-; LMULMAX2-RV64-NEXT:    vle64.v v8, (a3)
-; LMULMAX2-RV64-NEXT:    addi a3, a0, 32
-; LMULMAX2-RV64-NEXT:    vle64.v v10, (a3)
-; LMULMAX2-RV64-NEXT:    addi a3, a0, 64
-; LMULMAX2-RV64-NEXT:    vle64.v v12, (a3)
-; LMULMAX2-RV64-NEXT:    vle64.v v14, (a0)
-; LMULMAX2-RV64-NEXT:    vadd.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vadd.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vx v12, v12, a1
-; LMULMAX2-RV64-NEXT:    vadd.vx v14, v14, a1
-; LMULMAX2-RV64-NEXT:    vse64.v v14, (a2)
-; LMULMAX2-RV64-NEXT:    addi a0, a2, 64
-; LMULMAX2-RV64-NEXT:    vse64.v v12, (a0)
-; LMULMAX2-RV64-NEXT:    addi a0, a2, 96
-; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    addi a0, a2, 32
-; LMULMAX2-RV64-NEXT:    vse64.v v10, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: vadd_vx_v16i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a3, a0, 96
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a3)
-; LMULMAX1-RV64-NEXT:    addi a3, a0, 112
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV64-NEXT:    addi a3, a0, 64
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a3)
-; LMULMAX1-RV64-NEXT:    addi a3, a0, 48
-; LMULMAX1-RV64-NEXT:    vle64.v v12, (a3)
-; LMULMAX1-RV64-NEXT:    addi a3, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v13, (a3)
-; LMULMAX1-RV64-NEXT:    addi a3, a0, 80
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 32
-; LMULMAX1-RV64-NEXT:    vle64.v v14, (a0)
-; LMULMAX1-RV64-NEXT:    vle64.v v15, (a3)
-; LMULMAX1-RV64-NEXT:    vadd.vx v13, v13, a1
-; LMULMAX1-RV64-NEXT:    vadd.vx v12, v12, a1
-; LMULMAX1-RV64-NEXT:    vadd.vx v14, v14, a1
-; LMULMAX1-RV64-NEXT:    vadd.vx v15, v15, a1
-; LMULMAX1-RV64-NEXT:    vadd.vx v11, v11, a1
-; LMULMAX1-RV64-NEXT:    vadd.vx v10, v10, a1
-; LMULMAX1-RV64-NEXT:    vadd.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vadd.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a2)
-; LMULMAX1-RV64-NEXT:    addi a0, a2, 96
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a2, 112
-; LMULMAX1-RV64-NEXT:    vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a2, 64
-; LMULMAX1-RV64-NEXT:    vse64.v v11, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a2, 80
-; LMULMAX1-RV64-NEXT:    vse64.v v15, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a2, 32
-; LMULMAX1-RV64-NEXT:    vse64.v v14, (a0)
-; LMULMAX1-RV64-NEXT:    addi a0, a2, 48
-; LMULMAX1-RV64-NEXT:    vse64.v v12, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a2, 16
-; LMULMAX1-RV64-NEXT:    vse64.v v13, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; RV32-LABEL: vadd_vx_v16i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    sw a2, 12(sp)
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vse64.v v8, (a3)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vadd_vx_v16i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vadd.vx v8, v8, a1
+; RV64-NEXT:    vse64.v v8, (a2)
+; RV64-NEXT:    ret
   %va = load <16 x i64>, ptr %a
   %head = insertelement <16 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <16 x i64> %head, <16 x i64> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll
index 841e72f3afc353..2c0b1d09b52d93 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define void @gather_const_v16i8(ptr %x) {
 ; CHECK-LABEL: gather_const_v16i8:
@@ -69,27 +67,14 @@ define void @gather_const_v2i64(ptr %x) {
 }
 
 define void @gather_const_v64i8(ptr %x) {
-; LMULMAX4-LABEL: gather_const_v64i8:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    li a1, 64
-; LMULMAX4-NEXT:    addi a2, a0, 32
-; LMULMAX4-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; LMULMAX4-NEXT:    vlse8.v v8, (a2), zero
-; LMULMAX4-NEXT:    vse8.v v8, (a0)
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX1-LABEL: gather_const_v64i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vlse8.v v8, (a1), zero
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    addi a3, a0, 48
-; LMULMAX1-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-NEXT:    vse8.v v8, (a3)
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    vse8.v v8, (a2)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: gather_const_v64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:    addi a2, a0, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a2), zero
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <64 x i8>, ptr %x
   %b = extractelement <64 x i8> %a, i32 32
   %c = insertelement <64 x i8> poison, i8 %b, i32 0
@@ -99,28 +84,14 @@ define void @gather_const_v64i8(ptr %x) {
 }
 
 define void @gather_const_v16i16(ptr %x) {
-; LMULMAX4-LABEL: gather_const_v16i16:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    li a1, 32
-; LMULMAX4-NEXT:    addi a2, a0, 50
-; LMULMAX4-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; LMULMAX4-NEXT:    vlse16.v v8, (a2), zero
-; LMULMAX4-NEXT:    vse16.v v8, (a0)
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX1-LABEL: gather_const_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 50
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vlse16.v v8, (a1), zero
-; LMULMAX1-NEXT:    addi a1, a0, 48
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    addi a3, a0, 32
-; LMULMAX1-NEXT:    vse16.v v8, (a3)
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    vse16.v v8, (a2)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: gather_const_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    addi a2, a0, 50
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a2), zero
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i16>, ptr %x
   %b = extractelement <32 x i16> %a, i32 25
   %c = insertelement <32 x i16> poison, i16 %b, i32 0
@@ -130,27 +101,13 @@ define void @gather_const_v16i16(ptr %x) {
 }
 
 define void @gather_const_v16i32(ptr %x) {
-; LMULMAX4-LABEL: gather_const_v16i32:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi a1, a0, 36
-; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT:    vlse32.v v8, (a1), zero
-; LMULMAX4-NEXT:    vse32.v v8, (a0)
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX1-LABEL: gather_const_v16i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 36
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vlse32.v v8, (a1), zero
-; LMULMAX1-NEXT:    addi a1, a0, 32
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    addi a3, a0, 48
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-NEXT:    vse32.v v8, (a3)
-; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    vse32.v v8, (a2)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: gather_const_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a1, a0, 36
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a1), zero
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i32>, ptr %x
   %b = extractelement <16 x i32> %a, i32 9
   %c = insertelement <16 x i32> poison, i32 %b, i32 0
@@ -160,27 +117,13 @@ define void @gather_const_v16i32(ptr %x) {
 }
 
 define void @gather_const_v8i64(ptr %x) {
-; LMULMAX4-LABEL: gather_const_v8i64:
-; LMULMAX4:       # %bb.0:
-; LMULMAX4-NEXT:    addi a1, a0, 24
-; LMULMAX4-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; LMULMAX4-NEXT:    vlse64.v v8, (a1), zero
-; LMULMAX4-NEXT:    vse64.v v8, (a0)
-; LMULMAX4-NEXT:    ret
-;
-; LMULMAX1-LABEL: gather_const_v8i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a1, a0, 24
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vlse64.v v8, (a1), zero
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    addi a2, a0, 48
-; LMULMAX1-NEXT:    addi a3, a0, 32
-; LMULMAX1-NEXT:    vse64.v v8, (a3)
-; LMULMAX1-NEXT:    vse64.v v8, (a2)
-; LMULMAX1-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-NEXT:    vse64.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: gather_const_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a1, a0, 24
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vlse64.v v8, (a1), zero
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i64>, ptr %x
   %b = extractelement <8 x i64> %a, i32 3
   %c = insertelement <8 x i64> poison, i64 %b, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 7a4620a5382584..175b110538ffba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define void @add_v16i8(ptr %x, ptr %y) {
 ; CHECK-LABEL: add_v16i8:
@@ -2377,45 +2375,15 @@ define void @umax_xv_v4i32(ptr %x, i32 %y) {
 }
 
 define void @add_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: add_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: add_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: add_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: add_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = add <32 x i8> %a, %b
@@ -2424,44 +2392,14 @@ define void @add_v32i8(ptr %x, ptr %y) {
 }
 
 define void @add_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: add_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: add_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: add_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: add_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = add <16 x i16> %a, %b
@@ -2470,44 +2408,14 @@ define void @add_v16i16(ptr %x, ptr %y) {
 }
 
 define void @add_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: add_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: add_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: add_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: add_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = add <8 x i32> %a, %b
@@ -2516,48 +2424,16 @@ define void @add_v8i32(ptr %x, ptr %y) {
 }
 
 define void @add_v6i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: add_v6i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: add_v6i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV32-NEXT:    addi a1, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: add_v6i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: add_v6i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i32>, ptr %x
   %b = load <6 x i32>, ptr %y
   %c = add <6 x i32> %a, %b
@@ -2566,44 +2442,14 @@ define void @add_v6i32(ptr %x, ptr %y) {
 }
 
 define void @add_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: add_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: add_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: add_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: add_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = add <4 x i64> %a, %b
@@ -2612,45 +2458,15 @@ define void @add_v4i64(ptr %x, ptr %y) {
 }
 
 define void @sub_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: sub_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: sub_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: sub_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: sub_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = sub <32 x i8> %a, %b
@@ -2659,44 +2475,14 @@ define void @sub_v32i8(ptr %x, ptr %y) {
 }
 
 define void @sub_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: sub_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: sub_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: sub_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: sub_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = sub <16 x i16> %a, %b
@@ -2705,44 +2491,14 @@ define void @sub_v16i16(ptr %x, ptr %y) {
 }
 
 define void @sub_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: sub_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: sub_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: sub_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: sub_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = sub <8 x i32> %a, %b
@@ -2751,44 +2507,14 @@ define void @sub_v8i32(ptr %x, ptr %y) {
 }
 
 define void @sub_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: sub_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: sub_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: sub_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: sub_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = sub <4 x i64> %a, %b
@@ -2797,45 +2523,15 @@ define void @sub_v4i64(ptr %x, ptr %y) {
 }
 
 define void @mul_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: mul_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vmul.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: mul_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmul.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: mul_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmul.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmul.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: mul_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vmul.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = mul <32 x i8> %a, %b
@@ -2844,44 +2540,14 @@ define void @mul_v32i8(ptr %x, ptr %y) {
 }
 
 define void @mul_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: mul_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vmul.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: mul_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmul.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: mul_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmul.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmul.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: mul_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vmul.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = mul <16 x i16> %a, %b
@@ -2890,44 +2556,14 @@ define void @mul_v16i16(ptr %x, ptr %y) {
 }
 
 define void @mul_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: mul_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vmul.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: mul_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmul.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: mul_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmul.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmul.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: mul_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vmul.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = mul <8 x i32> %a, %b
@@ -2936,44 +2572,14 @@ define void @mul_v8i32(ptr %x, ptr %y) {
 }
 
 define void @mul_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: mul_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vmul.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: mul_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmul.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: mul_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmul.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmul.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: mul_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vmul.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = mul <4 x i64> %a, %b
@@ -2982,45 +2588,15 @@ define void @mul_v4i64(ptr %x, ptr %y) {
 }
 
 define void @and_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: and_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: and_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: and_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vand.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: and_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = and <32 x i8> %a, %b
@@ -3029,44 +2605,14 @@ define void @and_v32i8(ptr %x, ptr %y) {
 }
 
 define void @and_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: and_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: and_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: and_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vand.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: and_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = and <16 x i16> %a, %b
@@ -3075,44 +2621,14 @@ define void @and_v16i16(ptr %x, ptr %y) {
 }
 
 define void @and_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: and_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: and_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: and_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vand.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: and_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = and <8 x i32> %a, %b
@@ -3121,44 +2637,14 @@ define void @and_v8i32(ptr %x, ptr %y) {
 }
 
 define void @and_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: and_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: and_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: and_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vand.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: and_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vand.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = and <4 x i64> %a, %b
@@ -3167,45 +2653,15 @@ define void @and_v4i64(ptr %x, ptr %y) {
 }
 
 define void @or_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: or_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: or_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: or_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: or_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = or <32 x i8> %a, %b
@@ -3214,44 +2670,14 @@ define void @or_v32i8(ptr %x, ptr %y) {
 }
 
 define void @or_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: or_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: or_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: or_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: or_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = or <16 x i16> %a, %b
@@ -3260,44 +2686,14 @@ define void @or_v16i16(ptr %x, ptr %y) {
 }
 
 define void @or_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: or_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: or_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: or_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: or_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = or <8 x i32> %a, %b
@@ -3306,44 +2702,14 @@ define void @or_v8i32(ptr %x, ptr %y) {
 }
 
 define void @or_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: or_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: or_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: or_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: or_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vor.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = or <4 x i64> %a, %b
@@ -3352,45 +2718,15 @@ define void @or_v4i64(ptr %x, ptr %y) {
 }
 
 define void @xor_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: xor_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: xor_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vxor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vxor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: xor_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vxor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vxor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: xor_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vxor.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = xor <32 x i8> %a, %b
@@ -3399,44 +2735,14 @@ define void @xor_v32i8(ptr %x, ptr %y) {
 }
 
 define void @xor_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: xor_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: xor_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vxor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vxor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: xor_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vxor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vxor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: xor_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vxor.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = xor <16 x i16> %a, %b
@@ -3445,44 +2751,14 @@ define void @xor_v16i16(ptr %x, ptr %y) {
 }
 
 define void @xor_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: xor_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: xor_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vxor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vxor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: xor_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vxor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vxor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: xor_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vxor.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = xor <8 x i32> %a, %b
@@ -3491,44 +2767,14 @@ define void @xor_v8i32(ptr %x, ptr %y) {
 }
 
 define void @xor_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: xor_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: xor_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vxor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vxor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: xor_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vxor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vxor.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: xor_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vxor.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = xor <4 x i64> %a, %b
@@ -3537,45 +2783,15 @@ define void @xor_v4i64(ptr %x, ptr %y) {
 }
 
 define void @lshr_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: lshr_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: lshr_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: lshr_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsrl.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: lshr_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vsrl.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = lshr <32 x i8> %a, %b
@@ -3584,44 +2800,14 @@ define void @lshr_v32i8(ptr %x, ptr %y) {
 }
 
 define void @lshr_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: lshr_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: lshr_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: lshr_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsrl.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: lshr_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vsrl.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = lshr <16 x i16> %a, %b
@@ -3630,44 +2816,14 @@ define void @lshr_v16i16(ptr %x, ptr %y) {
 }
 
 define void @lshr_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: lshr_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: lshr_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: lshr_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsrl.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: lshr_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vsrl.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = lshr <8 x i32> %a, %b
@@ -3676,44 +2832,14 @@ define void @lshr_v8i32(ptr %x, ptr %y) {
 }
 
 define void @lshr_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: lshr_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: lshr_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: lshr_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsrl.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: lshr_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vsrl.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = lshr <4 x i64> %a, %b
@@ -3722,45 +2848,15 @@ define void @lshr_v4i64(ptr %x, ptr %y) {
 }
 
 define void @ashr_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ashr_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vsra.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ashr_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsra.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsra.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ashr_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsra.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsra.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ashr_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vsra.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = ashr <32 x i8> %a, %b
@@ -3769,44 +2865,14 @@ define void @ashr_v32i8(ptr %x, ptr %y) {
 }
 
 define void @ashr_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ashr_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vsra.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ashr_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsra.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsra.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ashr_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsra.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsra.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ashr_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vsra.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = ashr <16 x i16> %a, %b
@@ -3815,44 +2881,14 @@ define void @ashr_v16i16(ptr %x, ptr %y) {
 }
 
 define void @ashr_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ashr_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vsra.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ashr_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsra.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsra.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ashr_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsra.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsra.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ashr_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vsra.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = ashr <8 x i32> %a, %b
@@ -3861,44 +2897,14 @@ define void @ashr_v8i32(ptr %x, ptr %y) {
 }
 
 define void @ashr_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: ashr_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vsra.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ashr_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsra.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsra.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ashr_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsra.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsra.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ashr_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vsra.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = ashr <4 x i64> %a, %b
@@ -3907,45 +2913,15 @@ define void @ashr_v4i64(ptr %x, ptr %y) {
 }
 
 define void @shl_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: shl_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vsll.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: shl_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsll.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsll.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: shl_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsll.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsll.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: shl_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vsll.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = shl <32 x i8> %a, %b
@@ -3954,44 +2930,14 @@ define void @shl_v32i8(ptr %x, ptr %y) {
 }
 
 define void @shl_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: shl_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vsll.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: shl_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsll.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsll.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: shl_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsll.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsll.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: shl_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vsll.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = shl <16 x i16> %a, %b
@@ -4000,44 +2946,14 @@ define void @shl_v16i16(ptr %x, ptr %y) {
 }
 
 define void @shl_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: shl_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vsll.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: shl_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsll.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsll.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: shl_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsll.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsll.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: shl_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vsll.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = shl <8 x i32> %a, %b
@@ -4046,44 +2962,14 @@ define void @shl_v8i32(ptr %x, ptr %y) {
 }
 
 define void @shl_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: shl_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vsll.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: shl_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vsll.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsll.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: shl_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vsll.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsll.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: shl_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vsll.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = shl <4 x i64> %a, %b
@@ -4092,45 +2978,15 @@ define void @shl_v4i64(ptr %x, ptr %y) {
 }
 
 define void @sdiv_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: sdiv_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vdiv.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: sdiv_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: sdiv_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vdiv.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vdiv.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: sdiv_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = sdiv <32 x i8> %a, %b
@@ -4139,44 +2995,14 @@ define void @sdiv_v32i8(ptr %x, ptr %y) {
 }
 
 define void @sdiv_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: sdiv_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vdiv.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: sdiv_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: sdiv_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vdiv.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vdiv.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: sdiv_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = sdiv <16 x i16> %a, %b
@@ -4185,44 +3011,14 @@ define void @sdiv_v16i16(ptr %x, ptr %y) {
 }
 
 define void @sdiv_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: sdiv_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vdiv.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: sdiv_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: sdiv_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vdiv.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vdiv.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: sdiv_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = sdiv <8 x i32> %a, %b
@@ -4231,44 +3027,14 @@ define void @sdiv_v8i32(ptr %x, ptr %y) {
 }
 
 define void @sdiv_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: sdiv_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vdiv.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: sdiv_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: sdiv_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vdiv.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vdiv.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: sdiv_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = sdiv <4 x i64> %a, %b
@@ -4277,45 +3043,15 @@ define void @sdiv_v4i64(ptr %x, ptr %y) {
 }
 
 define void @srem_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: srem_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vrem.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: srem_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vrem.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vrem.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: srem_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vrem.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vrem.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: srem_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vrem.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = srem <32 x i8> %a, %b
@@ -4324,44 +3060,14 @@ define void @srem_v32i8(ptr %x, ptr %y) {
 }
 
 define void @srem_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: srem_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vrem.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: srem_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vrem.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vrem.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: srem_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vrem.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vrem.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: srem_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vrem.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = srem <16 x i16> %a, %b
@@ -4370,44 +3076,14 @@ define void @srem_v16i16(ptr %x, ptr %y) {
 }
 
 define void @srem_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: srem_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vrem.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: srem_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vrem.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vrem.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: srem_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vrem.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vrem.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: srem_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vrem.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = srem <8 x i32> %a, %b
@@ -4416,44 +3092,14 @@ define void @srem_v8i32(ptr %x, ptr %y) {
 }
 
 define void @srem_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: srem_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vrem.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: srem_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vrem.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vrem.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: srem_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vrem.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vrem.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: srem_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vrem.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = srem <4 x i64> %a, %b
@@ -4462,45 +3108,15 @@ define void @srem_v4i64(ptr %x, ptr %y) {
 }
 
 define void @udiv_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: udiv_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vdivu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: udiv_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: udiv_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: udiv_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = udiv <32 x i8> %a, %b
@@ -4509,44 +3125,14 @@ define void @udiv_v32i8(ptr %x, ptr %y) {
 }
 
 define void @udiv_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: udiv_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vdivu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: udiv_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: udiv_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: udiv_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = udiv <16 x i16> %a, %b
@@ -4555,44 +3141,14 @@ define void @udiv_v16i16(ptr %x, ptr %y) {
 }
 
 define void @udiv_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: udiv_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vdivu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: udiv_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: udiv_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: udiv_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = udiv <8 x i32> %a, %b
@@ -4601,44 +3157,14 @@ define void @udiv_v8i32(ptr %x, ptr %y) {
 }
 
 define void @udiv_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: udiv_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vdivu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: udiv_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: udiv_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: udiv_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = udiv <4 x i64> %a, %b
@@ -4647,45 +3173,15 @@ define void @udiv_v4i64(ptr %x, ptr %y) {
 }
 
 define void @urem_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: urem_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vremu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: urem_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vremu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vremu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: urem_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vremu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vremu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: urem_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vremu.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %c = urem <32 x i8> %a, %b
@@ -4694,44 +3190,14 @@ define void @urem_v32i8(ptr %x, ptr %y) {
 }
 
 define void @urem_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: urem_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vremu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: urem_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vremu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vremu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: urem_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vremu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vremu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: urem_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vremu.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %c = urem <16 x i16> %a, %b
@@ -4740,44 +3206,14 @@ define void @urem_v16i16(ptr %x, ptr %y) {
 }
 
 define void @urem_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: urem_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vremu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: urem_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vremu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vremu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: urem_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vremu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vremu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: urem_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vremu.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %c = urem <8 x i32> %a, %b
@@ -4786,44 +3222,14 @@ define void @urem_v8i32(ptr %x, ptr %y) {
 }
 
 define void @urem_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: urem_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vremu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: urem_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vremu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vremu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: urem_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vremu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vremu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: urem_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vremu.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %c = urem <4 x i64> %a, %b
@@ -4832,29 +3238,14 @@ define void @urem_v4i64(ptr %x, ptr %y) {
 }
 
 define void @extract_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: extract_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: extract_v4i64:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-NEXT:    addi a2, a0, 16
-; LMULMAX1-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-NEXT:    vle64.v v10, (a1)
-; LMULMAX1-NEXT:    addi a1, a1, 16
-; LMULMAX1-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-NEXT:    vadd.vv v9, v9, v11
-; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: extract_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   br label %"compute"
@@ -4865,70 +3256,55 @@ define void @extract_v4i64(ptr %x, ptr %y) {
 }
 
 define void @mulhu_v32i8(ptr %x) {
-; LMULMAX2-LABEL: mulhu_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vmv.v.i v10, 0
-; LMULMAX2-NEXT:    lui a1, 163907
-; LMULMAX2-NEXT:    addi a1, a1, -2044
-; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-NEXT:    li a1, -128
-; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmerge.vxm v12, v10, a1, v0
-; LMULMAX2-NEXT:    lui a1, 66049
-; LMULMAX2-NEXT:    addi a1, a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; LMULMAX2-NEXT:    lui a1, %hi(.LCPI181_0)
-; LMULMAX2-NEXT:    addi a1, a1, %lo(.LCPI181_0)
-; LMULMAX2-NEXT:    vle8.v v14, (a1)
-; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-NEXT:    vsrl.vv v10, v8, v10
-; LMULMAX2-NEXT:    vmulhu.vv v10, v10, v14
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmulhu.vv v8, v8, v12
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmv.v.i v10, 4
-; LMULMAX2-NEXT:    lui a1, 8208
-; LMULMAX2-NEXT:    addi a1, a1, 513
-; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-NEXT:    lui a1, 66785
-; LMULMAX2-NEXT:    addi a1, a1, 78
-; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmerge.vim v10, v10, 3, v0
-; LMULMAX2-NEXT:    lui a1, 529160
-; LMULMAX2-NEXT:    addi a1, a1, 304
-; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmerge.vim v10, v10, 2, v0
-; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: mulhu_v32i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-NEXT:    lui a2, %hi(.LCPI181_0)
-; LMULMAX1-NEXT:    addi a2, a2, %lo(.LCPI181_0)
-; LMULMAX1-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-NEXT:    vle8.v v10, (a0)
-; LMULMAX1-NEXT:    vdivu.vv v8, v8, v9
-; LMULMAX1-NEXT:    vdivu.vv v9, v10, v9
-; LMULMAX1-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: mulhu_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vmv.v.i v10, 0
+; CHECK-NEXT:    lui a1, 163907
+; CHECK-NEXT:    addi a1, a1, -2044
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    li a1, -128
+; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT:    vmerge.vxm v12, v10, a1, v0
+; CHECK-NEXT:    lui a1, 66049
+; CHECK-NEXT:    addi a1, a1, 32
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT:    lui a1, %hi(.LCPI181_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI181_0)
+; CHECK-NEXT:    vle8.v v14, (a1)
+; CHECK-NEXT:    vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT:    vsrl.vv v10, v8, v10
+; CHECK-NEXT:    vmulhu.vv v10, v10, v14
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vmulhu.vv v8, v8, v12
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vmv.v.i v10, 4
+; CHECK-NEXT:    lui a1, 8208
+; CHECK-NEXT:    addi a1, a1, 513
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT:    vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT:    lui a1, 66785
+; CHECK-NEXT:    addi a1, a1, 78
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT:    vmerge.vim v10, v10, 3, v0
+; CHECK-NEXT:    lui a1, 529160
+; CHECK-NEXT:    addi a1, a1, 304
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT:    vmerge.vim v10, v10, 2, v0
+; CHECK-NEXT:    vsrl.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = udiv <32 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25>
   store <32 x i8> %b, ptr %x
@@ -4936,93 +3312,78 @@ define void @mulhu_v32i8(ptr %x) {
 }
 
 define void @mulhu_v16i16(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhu_v16i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle16.v v10, (a0)
-; LMULMAX2-RV32-NEXT:    li a1, 257
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-RV32-NEXT:    lui a1, 1048568
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v12, v8, a1, v0
-; LMULMAX2-RV32-NEXT:    lui a1, 4
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 64
-; LMULMAX2-RV32-NEXT:    vmv.s.x v8, a1
-; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.i v9, 0
-; LMULMAX2-RV32-NEXT:    vmv1r.v v0, v8
-; LMULMAX2-RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI182_0)
-; LMULMAX2-RV32-NEXT:    addi a1, a1, %lo(.LCPI182_0)
-; LMULMAX2-RV32-NEXT:    vle16.v v14, (a1)
-; LMULMAX2-RV32-NEXT:    vsext.vf2 v16, v9
-; LMULMAX2-RV32-NEXT:    vsrl.vv v16, v10, v16
-; LMULMAX2-RV32-NEXT:    vmulhu.vv v14, v16, v14
-; LMULMAX2-RV32-NEXT:    vsub.vv v10, v10, v14
-; LMULMAX2-RV32-NEXT:    vmulhu.vv v10, v10, v12
-; LMULMAX2-RV32-NEXT:    vadd.vv v10, v10, v14
-; LMULMAX2-RV32-NEXT:    lui a1, 2
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 289
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.i v9, 3
-; LMULMAX2-RV32-NEXT:    vmerge.vim v9, v9, 2, v0
-; LMULMAX2-RV32-NEXT:    vmv1r.v v0, v8
-; LMULMAX2-RV32-NEXT:    vmerge.vim v8, v9, 1, v0
-; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vsext.vf2 v12, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vv v8, v10, v12
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: mulhu_v16i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    li a1, 257
-; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV64-NEXT:    vmv.v.i v10, 0
-; LMULMAX2-RV64-NEXT:    lui a1, 1048568
-; LMULMAX2-RV64-NEXT:    vmerge.vxm v10, v10, a1, v0
-; LMULMAX2-RV64-NEXT:    li a1, 1
-; LMULMAX2-RV64-NEXT:    slli a1, a1, 48
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI182_0)
-; LMULMAX2-RV64-NEXT:    addi a1, a1, %lo(.LCPI182_0)
-; LMULMAX2-RV64-NEXT:    vle16.v v14, (a1)
-; LMULMAX2-RV64-NEXT:    vsext.vf2 v16, v12
-; LMULMAX2-RV64-NEXT:    vsrl.vv v12, v8, v16
-; LMULMAX2-RV64-NEXT:    vmulhu.vv v12, v12, v14
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vmulhu.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI182_1)
-; LMULMAX2-RV64-NEXT:    addi a1, a1, %lo(.LCPI182_1)
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX2-RV64-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vsext.vf2 v12, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
+; RV32-LABEL: mulhu_v16i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV32-NEXT:    vle16.v v10, (a0)
+; RV32-NEXT:    li a1, 257
+; RV32-NEXT:    vmv.s.x v0, a1
+; RV32-NEXT:    vmv.v.i v8, 0
+; RV32-NEXT:    lui a1, 1048568
+; RV32-NEXT:    vmerge.vxm v12, v8, a1, v0
+; RV32-NEXT:    lui a1, 4
+; RV32-NEXT:    addi a1, a1, 64
+; RV32-NEXT:    vmv.s.x v8, a1
+; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RV32-NEXT:    vmv.v.i v9, 0
+; RV32-NEXT:    vmv1r.v v0, v8
+; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    lui a1, %hi(.LCPI182_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI182_0)
+; RV32-NEXT:    vle16.v v14, (a1)
+; RV32-NEXT:    vsext.vf2 v16, v9
+; RV32-NEXT:    vsrl.vv v16, v10, v16
+; RV32-NEXT:    vmulhu.vv v14, v16, v14
+; RV32-NEXT:    vsub.vv v10, v10, v14
+; RV32-NEXT:    vmulhu.vv v10, v10, v12
+; RV32-NEXT:    vadd.vv v10, v10, v14
+; RV32-NEXT:    lui a1, 2
+; RV32-NEXT:    addi a1, a1, 289
+; RV32-NEXT:    vmv.s.x v0, a1
+; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
+; RV32-NEXT:    vmv.v.i v9, 3
+; RV32-NEXT:    vmerge.vim v9, v9, 2, v0
+; RV32-NEXT:    vmv1r.v v0, v8
+; RV32-NEXT:    vmerge.vim v8, v9, 1, v0
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsext.vf2 v12, v8
+; RV32-NEXT:    vsrl.vv v8, v10, v12
+; RV32-NEXT:    vse16.v v8, (a0)
+; RV32-NEXT:    ret
 ;
-; LMULMAX1-LABEL: mulhu_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle16.v v8, (a1)
-; LMULMAX1-NEXT:    lui a2, %hi(.LCPI182_0)
-; LMULMAX1-NEXT:    addi a2, a2, %lo(.LCPI182_0)
-; LMULMAX1-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-NEXT:    vle16.v v10, (a0)
-; LMULMAX1-NEXT:    vdivu.vv v8, v8, v9
-; LMULMAX1-NEXT:    vdivu.vv v9, v10, v9
-; LMULMAX1-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
-; LMULMAX1-NEXT:    ret
+; RV64-LABEL: mulhu_v16i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT:    vle16.v v8, (a0)
+; RV64-NEXT:    li a1, 257
+; RV64-NEXT:    vmv.s.x v0, a1
+; RV64-NEXT:    vmv.v.i v10, 0
+; RV64-NEXT:    lui a1, 1048568
+; RV64-NEXT:    vmerge.vxm v10, v10, a1, v0
+; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    slli a1, a1, 48
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT:    lui a1, %hi(.LCPI182_0)
+; RV64-NEXT:    addi a1, a1, %lo(.LCPI182_0)
+; RV64-NEXT:    vle16.v v14, (a1)
+; RV64-NEXT:    vsext.vf2 v16, v12
+; RV64-NEXT:    vsrl.vv v12, v8, v16
+; RV64-NEXT:    vmulhu.vv v12, v12, v14
+; RV64-NEXT:    vsub.vv v8, v8, v12
+; RV64-NEXT:    vmulhu.vv v8, v8, v10
+; RV64-NEXT:    vadd.vv v8, v8, v12
+; RV64-NEXT:    lui a1, %hi(.LCPI182_1)
+; RV64-NEXT:    addi a1, a1, %lo(.LCPI182_1)
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vlse64.v v10, (a1), zero
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT:    vsext.vf2 v12, v10
+; RV64-NEXT:    vsrl.vv v8, v8, v12
+; RV64-NEXT:    vse16.v v8, (a0)
+; RV64-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = udiv <16 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
   store <16 x i16> %b, ptr %x
@@ -5030,80 +3391,31 @@ define void @mulhu_v16i16(ptr %x) {
 }
 
 define void @mulhu_v8i32(ptr %x) {
-; LMULMAX2-LABEL: mulhu_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    li a1, 68
-; LMULMAX2-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-NEXT:    lui a1, %hi(.LCPI183_0)
-; LMULMAX2-NEXT:    addi a1, a1, %lo(.LCPI183_0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vmv.v.i v12, 0
-; LMULMAX2-NEXT:    lui a1, 524288
-; LMULMAX2-NEXT:    vmerge.vxm v12, v12, a1, v0
-; LMULMAX2-NEXT:    vmulhu.vv v10, v8, v10
-; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-NEXT:    vmulhu.vv v8, v8, v12
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    lui a1, 4128
-; LMULMAX2-NEXT:    addi a1, a1, 514
-; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vsext.vf4 v12, v10
-; LMULMAX2-NEXT:    vsrl.vv v8, v8, v12
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: mulhu_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    lui a2, 524288
-; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
-; LMULMAX1-RV32-NEXT:    vmv.v.i v11, 0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
-; LMULMAX1-RV32-NEXT:    vslideup.vi v11, v10, 2
-; LMULMAX1-RV32-NEXT:    lui a2, %hi(.LCPI183_0)
-; LMULMAX1-RV32-NEXT:    addi a2, a2, %lo(.LCPI183_0)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV32-NEXT:    vmulhu.vv v12, v9, v10
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v12
-; LMULMAX1-RV32-NEXT:    vmulhu.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v12
-; LMULMAX1-RV32-NEXT:    lui a2, 4128
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 514
-; LMULMAX1-RV32-NEXT:    vmv.s.x v12, a2
-; LMULMAX1-RV32-NEXT:    vsext.vf4 v13, v12
-; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v9, v13
-; LMULMAX1-RV32-NEXT:    vmulhu.vv v10, v8, v10
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vmulhu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v8, v13
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: mulhu_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    lui a2, 36976
-; LMULMAX1-RV64-NEXT:    addi a2, a2, 1541
-; LMULMAX1-RV64-NEXT:    vmv.s.x v10, a2
-; LMULMAX1-RV64-NEXT:    vsext.vf4 v11, v10
-; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v9, v11
-; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: mulhu_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    li a1, 68
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    lui a1, %hi(.LCPI183_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI183_0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    lui a1, 524288
+; CHECK-NEXT:    vmerge.vxm v12, v12, a1, v0
+; CHECK-NEXT:    vmulhu.vv v10, v8, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vmulhu.vv v8, v8, v12
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    lui a1, 4128
+; CHECK-NEXT:    addi a1, a1, 514
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a1
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v10
+; CHECK-NEXT:    vsrl.vv v8, v8, v12
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = udiv <8 x i32> %a, <i32 5, i32 6, i32 7, i32 9, i32 5, i32 6, i32 7, i32 9>
   store <8 x i32> %b, ptr %x
@@ -5111,131 +3423,61 @@ define void @mulhu_v8i32(ptr %x) {
 }
 
 define void @mulhu_v4i64(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhu_v4i64:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI184_0)
-; LMULMAX2-RV32-NEXT:    addi a1, a1, %lo(.LCPI184_0)
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmulhu.vv v10, v8, v10
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 524288
-; LMULMAX2-RV32-NEXT:    vmv.s.x v12, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.i v14, 0
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; LMULMAX2-RV32-NEXT:    vslideup.vi v14, v12, 5
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmulhu.vv v8, v8, v14
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI184_1)
-; LMULMAX2-RV32-NEXT:    addi a1, a1, %lo(.LCPI184_1)
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-RV32-NEXT:    vsext.vf4 v12, v10
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vsrl.vv v8, v8, v12
-; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: mulhu_v4i64:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    li a1, -1
-; LMULMAX2-RV64-NEXT:    slli a1, a1, 63
-; LMULMAX2-RV64-NEXT:    vmv.s.x v10, a1
-; LMULMAX2-RV64-NEXT:    vmv.v.i v12, 0
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
-; LMULMAX2-RV64-NEXT:    vslideup.vi v12, v10, 2
-; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI184_0)
-; LMULMAX2-RV64-NEXT:    addi a1, a1, %lo(.LCPI184_0)
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-RV64-NEXT:    vmulhu.vv v10, v8, v10
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vmulhu.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 12320
-; LMULMAX2-RV64-NEXT:    addi a1, a1, 513
-; LMULMAX2-RV64-NEXT:    vmv.s.x v10, a1
-; LMULMAX2-RV64-NEXT:    vsext.vf8 v12, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vv v8, v8, v12
-; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: mulhu_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    lui a2, 144
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 7
-; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vsext.vf4 v11, v10
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    lui a2, 80
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 3
-; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vsext.vf4 v11, v10
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    ret
+; RV32-LABEL: mulhu_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    lui a1, %hi(.LCPI184_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI184_0)
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vle32.v v10, (a1)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vmulhu.vv v10, v8, v10
+; RV32-NEXT:    vsub.vv v8, v8, v10
+; RV32-NEXT:    lui a1, 524288
+; RV32-NEXT:    vmv.s.x v12, a1
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.i v14, 0
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
+; RV32-NEXT:    vslideup.vi v14, v12, 5
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vmulhu.vv v8, v8, v14
+; RV32-NEXT:    vadd.vv v8, v8, v10
+; RV32-NEXT:    lui a1, %hi(.LCPI184_1)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI184_1)
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vle8.v v10, (a1)
+; RV32-NEXT:    vsext.vf4 v12, v10
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vv v8, v8, v12
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
 ;
-; LMULMAX1-RV64-LABEL: mulhu_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vmv.v.i v10, 0
-; LMULMAX1-RV64-NEXT:    li a2, -1
-; LMULMAX1-RV64-NEXT:    slli a2, a2, 63
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
-; LMULMAX1-RV64-NEXT:    vmv.s.x v10, a2
-; LMULMAX1-RV64-NEXT:    lui a2, %hi(.LCPI184_0)
-; LMULMAX1-RV64-NEXT:    addi a2, a2, %lo(.LCPI184_0)
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vlse64.v v11, (a2), zero
-; LMULMAX1-RV64-NEXT:    lui a2, %hi(.LCPI184_1)
-; LMULMAX1-RV64-NEXT:    ld a2, %lo(.LCPI184_1)(a2)
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
-; LMULMAX1-RV64-NEXT:    vmv.s.x v11, a2
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmulhu.vv v11, v9, v11
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v11
-; LMULMAX1-RV64-NEXT:    vmulhu.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v11
-; LMULMAX1-RV64-NEXT:    vid.v v10
-; LMULMAX1-RV64-NEXT:    vadd.vi v11, v10, 2
-; LMULMAX1-RV64-NEXT:    vsrl.vv v9, v9, v11
-; LMULMAX1-RV64-NEXT:    lui a2, 838861
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, -819
-; LMULMAX1-RV64-NEXT:    slli a3, a2, 32
-; LMULMAX1-RV64-NEXT:    add a2, a2, a3
-; LMULMAX1-RV64-NEXT:    vmv.v.x v11, a2
-; LMULMAX1-RV64-NEXT:    lui a2, 699051
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, -1365
-; LMULMAX1-RV64-NEXT:    slli a3, a2, 32
-; LMULMAX1-RV64-NEXT:    add a2, a2, a3
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
-; LMULMAX1-RV64-NEXT:    vmv.s.x v11, a2
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmulhu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vadd.vi v10, v10, 1
-; LMULMAX1-RV64-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; RV64-LABEL: mulhu_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    li a1, -1
+; RV64-NEXT:    slli a1, a1, 63
+; RV64-NEXT:    vmv.s.x v10, a1
+; RV64-NEXT:    vmv.v.i v12, 0
+; RV64-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
+; RV64-NEXT:    vslideup.vi v12, v10, 2
+; RV64-NEXT:    lui a1, %hi(.LCPI184_0)
+; RV64-NEXT:    addi a1, a1, %lo(.LCPI184_0)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vmulhu.vv v10, v8, v10
+; RV64-NEXT:    vsub.vv v8, v8, v10
+; RV64-NEXT:    vmulhu.vv v8, v8, v12
+; RV64-NEXT:    vadd.vv v8, v8, v10
+; RV64-NEXT:    lui a1, 12320
+; RV64-NEXT:    addi a1, a1, 513
+; RV64-NEXT:    vmv.s.x v10, a1
+; RV64-NEXT:    vsext.vf8 v12, v10
+; RV64-NEXT:    vsrl.vv v8, v8, v12
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = udiv <4 x i64> %a, <i64 3, i64 5, i64 7, i64 9>
   store <4 x i64> %b, ptr %x
@@ -5243,44 +3485,26 @@ define void @mulhu_v4i64(ptr %x) {
 }
 
 define void @mulhs_v32i8(ptr %x) {
-; LMULMAX2-LABEL: mulhs_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a1, 32
-; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vmv.v.i v10, 7
-; LMULMAX2-NEXT:    lui a1, 304453
-; LMULMAX2-NEXT:    addi a1, a1, -1452
-; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; LMULMAX2-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
-; LMULMAX2-NEXT:    li a1, -123
-; LMULMAX2-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-NEXT:    li a1, 57
-; LMULMAX2-NEXT:    vmerge.vxm v12, v12, a1, v0
-; LMULMAX2-NEXT:    vmulhu.vv v8, v8, v12
-; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: mulhs_v32i8:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX1-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle8.v v9, (a1)
-; LMULMAX1-NEXT:    lui a2, 5
-; LMULMAX1-NEXT:    addi a2, a2, -1452
-; LMULMAX1-NEXT:    vmv.s.x v0, a2
-; LMULMAX1-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmv.v.i v10, -9
-; LMULMAX1-NEXT:    vmerge.vim v10, v10, 9, v0
-; LMULMAX1-NEXT:    vdivu.vv v9, v9, v10
-; LMULMAX1-NEXT:    vdivu.vv v8, v8, v10
-; LMULMAX1-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-NEXT:    vse8.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: mulhs_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vmv.v.i v10, 7
+; CHECK-NEXT:    lui a1, 304453
+; CHECK-NEXT:    addi a1, a1, -1452
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT:    vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT:    li a1, -123
+; CHECK-NEXT:    vmv.v.x v12, a1
+; CHECK-NEXT:    li a1, 57
+; CHECK-NEXT:    vmerge.vxm v12, v12, a1, v0
+; CHECK-NEXT:    vmulhu.vv v8, v8, v12
+; CHECK-NEXT:    vsrl.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = udiv <32 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
   store <32 x i8> %b, ptr %x
@@ -5288,41 +3512,25 @@ define void @mulhs_v32i8(ptr %x) {
 }
 
 define void @mulhs_v16i16(ptr %x) {
-; LMULMAX2-LABEL: mulhs_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    lui a1, 5
-; LMULMAX2-NEXT:    addi a1, a1, -1755
-; LMULMAX2-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-NEXT:    lui a1, 7
-; LMULMAX2-NEXT:    addi a1, a1, -1687
-; LMULMAX2-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-NEXT:    lui a1, 1048571
-; LMULMAX2-NEXT:    addi a1, a1, 1755
-; LMULMAX2-NEXT:    vmerge.vxm v10, v10, a1, v0
-; LMULMAX2-NEXT:    vmulh.vv v8, v8, v10
-; LMULMAX2-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX2-NEXT:    vsrl.vi v10, v8, 15
-; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: mulhs_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-NEXT:    addi a1, a0, 16
-; LMULMAX1-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-NEXT:    li a2, 105
-; LMULMAX1-NEXT:    vmv.s.x v0, a2
-; LMULMAX1-NEXT:    vmv.v.i v10, 7
-; LMULMAX1-NEXT:    vmerge.vim v10, v10, -7, v0
-; LMULMAX1-NEXT:    vdiv.vv v9, v9, v10
-; LMULMAX1-NEXT:    vdiv.vv v8, v8, v10
-; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    vse16.v v9, (a1)
-; LMULMAX1-NEXT:    ret
+; CHECK-LABEL: mulhs_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    lui a1, 5
+; CHECK-NEXT:    addi a1, a1, -1755
+; CHECK-NEXT:    vmv.v.x v10, a1
+; CHECK-NEXT:    lui a1, 7
+; CHECK-NEXT:    addi a1, a1, -1687
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    lui a1, 1048571
+; CHECK-NEXT:    addi a1, a1, 1755
+; CHECK-NEXT:    vmerge.vxm v10, v10, a1, v0
+; CHECK-NEXT:    vmulh.vv v8, v8, v10
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsrl.vi v10, v8, 15
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = sdiv <16 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7, i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7>
   store <16 x i16> %b, ptr %x
@@ -5330,83 +3538,40 @@ define void @mulhs_v16i16(ptr %x) {
 }
 
 define void @mulhs_v8i32(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhs_v8i32:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    lui a1, 419430
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1639
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT:    li a1, 85
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
-; LMULMAX2-RV32-NEXT:    lui a1, 629146
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -1639
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v10, v10, a1, v0
-; LMULMAX2-RV32-NEXT:    vmulh.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 31
-; LMULMAX2-RV32-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: mulhs_v8i32:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI187_0)
-; LMULMAX2-RV64-NEXT:    addi a1, a1, %lo(.LCPI187_0)
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vlse64.v v10, (a1), zero
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmulh.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 31
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: mulhs_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    lui a2, 419430
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 1639
-; LMULMAX1-RV32-NEXT:    vmv.v.x v10, a2
-; LMULMAX1-RV32-NEXT:    vmv.v.i v0, 5
-; LMULMAX1-RV32-NEXT:    lui a2, 629146
-; LMULMAX1-RV32-NEXT:    addi a2, a2, -1639
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v10, v10, a2, v0
-; LMULMAX1-RV32-NEXT:    vmulh.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 31
-; LMULMAX1-RV32-NEXT:    vsra.vi v9, v9, 1
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vmulh.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 31
-; LMULMAX1-RV32-NEXT:    vsra.vi v8, v8, 1
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    ret
+; RV32-LABEL: mulhs_v8i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    lui a1, 419430
+; RV32-NEXT:    addi a1, a1, 1639
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    li a1, 85
+; RV32-NEXT:    vmv.s.x v0, a1
+; RV32-NEXT:    lui a1, 629146
+; RV32-NEXT:    addi a1, a1, -1639
+; RV32-NEXT:    vmerge.vxm v10, v10, a1, v0
+; RV32-NEXT:    vmulh.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v10, v8, 31
+; RV32-NEXT:    vsra.vi v8, v8, 1
+; RV32-NEXT:    vadd.vv v8, v8, v10
+; RV32-NEXT:    vse32.v v8, (a0)
+; RV32-NEXT:    ret
 ;
-; LMULMAX1-RV64-LABEL: mulhs_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    li a2, 3
-; LMULMAX1-RV64-NEXT:    slli a2, a2, 33
-; LMULMAX1-RV64-NEXT:    addi a2, a2, -5
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.x v10, a2
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vdiv.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vdiv.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; RV64-LABEL: mulhs_v8i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    lui a1, %hi(.LCPI187_0)
+; RV64-NEXT:    addi a1, a1, %lo(.LCPI187_0)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vlse64.v v10, (a1), zero
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT:    vmulh.vv v8, v8, v10
+; RV64-NEXT:    vsra.vi v8, v8, 1
+; RV64-NEXT:    vsrl.vi v10, v8, 31
+; RV64-NEXT:    vadd.vv v8, v8, v10
+; RV64-NEXT:    vse32.v v8, (a0)
+; RV64-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = sdiv <8 x i32> %a, <i32 -5, i32 5, i32 -5, i32 5, i32 -5, i32 5, i32 -5, i32 5>
   store <8 x i32> %b, ptr %x
@@ -5414,122 +3579,71 @@ define void @mulhs_v8i32(ptr %x) {
 }
 
 define void @mulhs_v4i64(ptr %x) {
-; LMULMAX2-RV32-LABEL: mulhs_v4i64:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    lui a1, 349525
-; LMULMAX2-RV32-NEXT:    addi a2, a1, 1365
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a2
-; LMULMAX2-RV32-NEXT:    li a2, 17
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a2
-; LMULMAX2-RV32-NEXT:    addi a1, a1, 1366
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v10, v10, a1, v0
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmulh.vv v10, v8, v10
-; LMULMAX2-RV32-NEXT:    lui a1, 1048560
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v12, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vsext.vf4 v14, v12
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmadd.vv v14, v8, v10
-; LMULMAX2-RV32-NEXT:    li a1, 63
-; LMULMAX2-RV32-NEXT:    vsrl.vx v8, v14, a1
-; LMULMAX2-RV32-NEXT:    lui a1, 16
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vsext.vf4 v12, v10
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vsra.vv v10, v14, v12
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: mulhs_v4i64:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    lui a1, 349525
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
-; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
-; LMULMAX2-RV64-NEXT:    add a1, a1, a2
-; LMULMAX2-RV64-NEXT:    vmv.v.x v10, a1
-; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI188_0)
-; LMULMAX2-RV64-NEXT:    ld a1, %lo(.LCPI188_0)(a1)
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; LMULMAX2-RV64-NEXT:    vmv.v.i v0, 5
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vmerge.vxm v10, v10, a1, v0
-; LMULMAX2-RV64-NEXT:    vmulh.vv v10, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, 1044496
-; LMULMAX2-RV64-NEXT:    addi a1, a1, -256
-; LMULMAX2-RV64-NEXT:    vmv.s.x v12, a1
-; LMULMAX2-RV64-NEXT:    vsext.vf8 v14, v12
-; LMULMAX2-RV64-NEXT:    vmadd.vv v14, v8, v10
-; LMULMAX2-RV64-NEXT:    li a1, 63
-; LMULMAX2-RV64-NEXT:    vsrl.vx v8, v14, a1
-; LMULMAX2-RV64-NEXT:    lui a1, 4096
-; LMULMAX2-RV64-NEXT:    addi a1, a1, 256
-; LMULMAX2-RV64-NEXT:    vmv.s.x v10, a1
-; LMULMAX2-RV64-NEXT:    vsext.vf8 v12, v10
-; LMULMAX2-RV64-NEXT:    vsra.vv v10, v14, v12
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: mulhs_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    lui a2, 1048528
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 3
-; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vsext.vf4 v11, v10
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v11
-; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    ret
+; RV32-LABEL: mulhs_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    lui a1, 349525
+; RV32-NEXT:    addi a2, a1, 1365
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a2
+; RV32-NEXT:    li a2, 17
+; RV32-NEXT:    vmv.s.x v0, a2
+; RV32-NEXT:    addi a1, a1, 1366
+; RV32-NEXT:    vmerge.vxm v10, v10, a1, v0
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vmulh.vv v10, v8, v10
+; RV32-NEXT:    lui a1, 1048560
+; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vsext.vf4 v14, v12
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vmadd.vv v14, v8, v10
+; RV32-NEXT:    li a1, 63
+; RV32-NEXT:    vsrl.vx v8, v14, a1
+; RV32-NEXT:    lui a1, 16
+; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a1
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vsext.vf4 v12, v10
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vsra.vv v10, v14, v12
+; RV32-NEXT:    vadd.vv v8, v10, v8
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
 ;
-; LMULMAX1-RV64-LABEL: mulhs_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    lui a2, 349525
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, 1365
-; LMULMAX1-RV64-NEXT:    slli a3, a2, 32
-; LMULMAX1-RV64-NEXT:    add a2, a2, a3
-; LMULMAX1-RV64-NEXT:    lui a3, %hi(.LCPI188_0)
-; LMULMAX1-RV64-NEXT:    ld a3, %lo(.LCPI188_0)(a3)
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vmv.v.x v10, a2
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
-; LMULMAX1-RV64-NEXT:    vmv.s.x v10, a3
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmulh.vv v11, v9, v10
-; LMULMAX1-RV64-NEXT:    vid.v v12
-; LMULMAX1-RV64-NEXT:    vrsub.vi v13, v12, 0
-; LMULMAX1-RV64-NEXT:    vmacc.vv v11, v13, v9
-; LMULMAX1-RV64-NEXT:    li a2, 63
-; LMULMAX1-RV64-NEXT:    vsrl.vx v9, v11, a2
-; LMULMAX1-RV64-NEXT:    vsra.vv v11, v11, v12
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v11, v9
-; LMULMAX1-RV64-NEXT:    vmulh.vv v10, v8, v10
-; LMULMAX1-RV64-NEXT:    vmacc.vv v10, v8, v13
-; LMULMAX1-RV64-NEXT:    vsrl.vx v8, v10, a2
-; LMULMAX1-RV64-NEXT:    vsra.vv v10, v10, v12
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    ret
+; RV64-LABEL: mulhs_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    lui a1, 349525
+; RV64-NEXT:    addiw a1, a1, 1365
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vmv.v.x v10, a1
+; RV64-NEXT:    lui a1, %hi(.LCPI188_0)
+; RV64-NEXT:    ld a1, %lo(.LCPI188_0)(a1)
+; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; RV64-NEXT:    vmv.v.i v0, 5
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmerge.vxm v10, v10, a1, v0
+; RV64-NEXT:    vmulh.vv v10, v8, v10
+; RV64-NEXT:    lui a1, 1044496
+; RV64-NEXT:    addi a1, a1, -256
+; RV64-NEXT:    vmv.s.x v12, a1
+; RV64-NEXT:    vsext.vf8 v14, v12
+; RV64-NEXT:    vmadd.vv v14, v8, v10
+; RV64-NEXT:    li a1, 63
+; RV64-NEXT:    vsrl.vx v8, v14, a1
+; RV64-NEXT:    lui a1, 4096
+; RV64-NEXT:    addi a1, a1, 256
+; RV64-NEXT:    vmv.s.x v10, a1
+; RV64-NEXT:    vsext.vf8 v12, v10
+; RV64-NEXT:    vsra.vv v10, v14, v12
+; RV64-NEXT:    vadd.vv v8, v10, v8
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = sdiv <4 x i64> %a, <i64 3, i64 -3, i64 3, i64 -3>
   store <4 x i64> %b, ptr %x
@@ -5537,45 +3651,15 @@ define void @mulhs_v4i64(ptr %x) {
 }
 
 define void @smin_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: smin_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vmin.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: smin_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmin.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmin.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: smin_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmin.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmin.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: smin_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vmin.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %cc = icmp slt <32 x i8> %a, %b
@@ -5585,44 +3669,14 @@ define void @smin_v32i8(ptr %x, ptr %y) {
 }
 
 define void @smin_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: smin_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vmin.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: smin_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmin.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmin.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: smin_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmin.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmin.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: smin_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vmin.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %cc = icmp slt <16 x i16> %a, %b
@@ -5632,44 +3686,14 @@ define void @smin_v16i16(ptr %x, ptr %y) {
 }
 
 define void @smin_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: smin_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vmin.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: smin_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmin.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmin.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: smin_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmin.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmin.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: smin_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vmin.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %cc = icmp slt <8 x i32> %a, %b
@@ -5679,44 +3703,14 @@ define void @smin_v8i32(ptr %x, ptr %y) {
 }
 
 define void @smin_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: smin_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vmin.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: smin_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmin.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmin.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: smin_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmin.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmin.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: smin_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vmin.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %cc = icmp slt <4 x i64> %a, %b
@@ -5726,45 +3720,15 @@ define void @smin_v4i64(ptr %x, ptr %y) {
 }
 
 define void @smax_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: smax_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: smax_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: smax_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmax.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: smax_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vmax.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %cc = icmp sgt <32 x i8> %a, %b
@@ -5774,44 +3738,14 @@ define void @smax_v32i8(ptr %x, ptr %y) {
 }
 
 define void @smax_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: smax_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: smax_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: smax_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmax.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: smax_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vmax.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %cc = icmp sgt <16 x i16> %a, %b
@@ -5821,44 +3755,14 @@ define void @smax_v16i16(ptr %x, ptr %y) {
 }
 
 define void @smax_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: smax_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: smax_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: smax_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmax.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: smax_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vmax.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %cc = icmp sgt <8 x i32> %a, %b
@@ -5868,44 +3772,14 @@ define void @smax_v8i32(ptr %x, ptr %y) {
 }
 
 define void @smax_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: smax_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: smax_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: smax_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmax.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: smax_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vmax.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %cc = icmp sgt <4 x i64> %a, %b
@@ -5915,45 +3789,15 @@ define void @smax_v4i64(ptr %x, ptr %y) {
 }
 
 define void @umin_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: umin_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vminu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: umin_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vminu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vminu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: umin_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vminu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vminu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: umin_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vminu.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %cc = icmp ult <32 x i8> %a, %b
@@ -5963,44 +3807,14 @@ define void @umin_v32i8(ptr %x, ptr %y) {
 }
 
 define void @umin_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: umin_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vminu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: umin_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vminu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vminu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: umin_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vminu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vminu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: umin_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vminu.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %cc = icmp ult <16 x i16> %a, %b
@@ -6010,44 +3824,14 @@ define void @umin_v16i16(ptr %x, ptr %y) {
 }
 
 define void @umin_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: umin_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vminu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: umin_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vminu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vminu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: umin_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vminu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vminu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: umin_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vminu.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %cc = icmp ult <8 x i32> %a, %b
@@ -6057,44 +3841,14 @@ define void @umin_v8i32(ptr %x, ptr %y) {
 }
 
 define void @umin_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: umin_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vminu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: umin_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vminu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vminu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: umin_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vminu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vminu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: umin_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vminu.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %cc = icmp ult <4 x i64> %a, %b
@@ -6104,45 +3858,15 @@ define void @umin_v4i64(ptr %x, ptr %y) {
 }
 
 define void @umax_v32i8(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: umax_v32i8:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-NEXT:    vle8.v v10, (a1)
-; LMULMAX2-NEXT:    vmaxu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: umax_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmaxu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmaxu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: umax_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmaxu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmaxu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: umax_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a1)
+; CHECK-NEXT:    vmaxu.vv v8, v8, v10
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <32 x i8>, ptr %x
   %b = load <32 x i8>, ptr %y
   %cc = icmp ugt <32 x i8> %a, %b
@@ -6152,44 +3876,14 @@ define void @umax_v32i8(ptr %x, ptr %y) {
 }
 
 define void @umax_v16i16(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: umax_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-NEXT:    vle16.v v10, (a1)
-; LMULMAX2-NEXT:    vmaxu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: umax_v16i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmaxu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmaxu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: umax_v16i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmaxu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmaxu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: umax_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v10, (a1)
+; CHECK-NEXT:    vmaxu.vv v8, v8, v10
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %x
   %b = load <16 x i16>, ptr %y
   %cc = icmp ugt <16 x i16> %a, %b
@@ -6199,44 +3893,14 @@ define void @umax_v16i16(ptr %x, ptr %y) {
 }
 
 define void @umax_v8i32(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: umax_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-NEXT:    vmaxu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: umax_v8i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmaxu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmaxu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: umax_v8i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmaxu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmaxu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: umax_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v10, (a1)
+; CHECK-NEXT:    vmaxu.vv v8, v8, v10
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = load <8 x i32>, ptr %y
   %cc = icmp ugt <8 x i32> %a, %b
@@ -6246,44 +3910,14 @@ define void @umax_v8i32(ptr %x, ptr %y) {
 }
 
 define void @umax_v4i64(ptr %x, ptr %y) {
-; LMULMAX2-LABEL: umax_v4i64:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; LMULMAX2-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-NEXT:    vle64.v v10, (a1)
-; LMULMAX2-NEXT:    vmaxu.vv v8, v8, v10
-; LMULMAX2-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: umax_v4i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
-; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
-; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV32-NEXT:    vmaxu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vmaxu.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: umax_v4i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
-; LMULMAX1-RV64-NEXT:    vmaxu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vmaxu.vv v8, v8, v11
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: umax_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vle64.v v10, (a1)
+; CHECK-NEXT:    vmaxu.vv v8, v8, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %x
   %b = load <4 x i64>, ptr %y
   %cc = icmp ugt <4 x i64> %a, %b
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index f86286a14b2aea..dd0fc5a11a0ed6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -1,12 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX1
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX4
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX4
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX8
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 ; Test with ELEN limited
 ; RUN: llc -mtriple=riscv32 -mattr=+f,+zve32f,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVE32F
 ; RUN: llc -mtriple=riscv64 -mattr=+f,+zve32f,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVE32F
@@ -462,73 +456,13 @@ define <16 x i1> @buildvec_mask_v16i1_undefs() {
 }
 
 define <32 x i1> @buildvec_mask_v32i1() {
-; RV32-LMULMAX1-LABEL: buildvec_mask_v32i1:
-; RV32-LMULMAX1:       # %bb.0:
-; RV32-LMULMAX1-NEXT:    li a0, 1776
-; RV32-LMULMAX1-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; RV32-LMULMAX1-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 11
-; RV32-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV32-LMULMAX1-NEXT:    vmv.s.x v8, a0
-; RV32-LMULMAX1-NEXT:    ret
-;
-; RV64-LMULMAX1-LABEL: buildvec_mask_v32i1:
-; RV64-LMULMAX1:       # %bb.0:
-; RV64-LMULMAX1-NEXT:    li a0, 1776
-; RV64-LMULMAX1-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; RV64-LMULMAX1-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 11
-; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV64-LMULMAX1-NEXT:    vmv.s.x v8, a0
-; RV64-LMULMAX1-NEXT:    ret
-;
-; RV32-LMULMAX2-LABEL: buildvec_mask_v32i1:
-; RV32-LMULMAX2:       # %bb.0:
-; RV32-LMULMAX2-NEXT:    lui a0, 748384
-; RV32-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX2-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-LMULMAX2-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX2-NEXT:    ret
-;
-; RV64-LMULMAX2-LABEL: buildvec_mask_v32i1:
-; RV64-LMULMAX2:       # %bb.0:
-; RV64-LMULMAX2-NEXT:    lui a0, 748384
-; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV64-LMULMAX2-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-LMULMAX2-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX2-NEXT:    ret
-;
-; RV32-LMULMAX4-LABEL: buildvec_mask_v32i1:
-; RV32-LMULMAX4:       # %bb.0:
-; RV32-LMULMAX4-NEXT:    lui a0, 748384
-; RV32-LMULMAX4-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX4-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-LMULMAX4-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX4-NEXT:    ret
-;
-; RV64-LMULMAX4-LABEL: buildvec_mask_v32i1:
-; RV64-LMULMAX4:       # %bb.0:
-; RV64-LMULMAX4-NEXT:    lui a0, 748384
-; RV64-LMULMAX4-NEXT:    addi a0, a0, 1776
-; RV64-LMULMAX4-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-LMULMAX4-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX4-NEXT:    ret
-;
-; RV32-LMULMAX8-LABEL: buildvec_mask_v32i1:
-; RV32-LMULMAX8:       # %bb.0:
-; RV32-LMULMAX8-NEXT:    lui a0, 748384
-; RV32-LMULMAX8-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX8-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-LMULMAX8-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX8-NEXT:    ret
-;
-; RV64-LMULMAX8-LABEL: buildvec_mask_v32i1:
-; RV64-LMULMAX8:       # %bb.0:
-; RV64-LMULMAX8-NEXT:    lui a0, 748384
-; RV64-LMULMAX8-NEXT:    addi a0, a0, 1776
-; RV64-LMULMAX8-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-LMULMAX8-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX8-NEXT:    ret
+; CHECK-LABEL: buildvec_mask_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 748384
+; CHECK-NEXT:    addi a0, a0, 1776
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_v32i1:
 ; ZVE32F:       # %bb.0:
@@ -541,95 +475,25 @@ define <32 x i1> @buildvec_mask_v32i1() {
 }
 
 define <64 x i1> @buildvec_mask_v64i1() {
-; RV32-LMULMAX1-LABEL: buildvec_mask_v64i1:
-; RV32-LMULMAX1:       # %bb.0:
-; RV32-LMULMAX1-NEXT:    li a0, 1776
-; RV32-LMULMAX1-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; RV32-LMULMAX1-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 4
-; RV32-LMULMAX1-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX1-NEXT:    vmv.s.x v9, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 11
-; RV32-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV32-LMULMAX1-NEXT:    vmv.s.x v8, a0
-; RV32-LMULMAX1-NEXT:    vmv.v.v v10, v8
-; RV32-LMULMAX1-NEXT:    ret
-;
-; RV64-LMULMAX1-LABEL: buildvec_mask_v64i1:
-; RV64-LMULMAX1:       # %bb.0:
-; RV64-LMULMAX1-NEXT:    li a0, 1776
-; RV64-LMULMAX1-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; RV64-LMULMAX1-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 4
-; RV64-LMULMAX1-NEXT:    addi a0, a0, -1793
-; RV64-LMULMAX1-NEXT:    vmv.s.x v9, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 11
-; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV64-LMULMAX1-NEXT:    vmv.s.x v8, a0
-; RV64-LMULMAX1-NEXT:    vmv.v.v v10, v8
-; RV64-LMULMAX1-NEXT:    ret
-;
-; RV32-LMULMAX2-LABEL: buildvec_mask_v64i1:
-; RV32-LMULMAX2:       # %bb.0:
-; RV32-LMULMAX2-NEXT:    lui a0, 748384
-; RV32-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX2-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-LMULMAX2-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX2-NEXT:    lui a0, 748388
-; RV32-LMULMAX2-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX2-NEXT:    vmv.s.x v8, a0
-; RV32-LMULMAX2-NEXT:    ret
-;
-; RV64-LMULMAX2-LABEL: buildvec_mask_v64i1:
-; RV64-LMULMAX2:       # %bb.0:
-; RV64-LMULMAX2-NEXT:    lui a0, 748384
-; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV64-LMULMAX2-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-LMULMAX2-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX2-NEXT:    lui a0, 748388
-; RV64-LMULMAX2-NEXT:    addi a0, a0, -1793
-; RV64-LMULMAX2-NEXT:    vmv.s.x v8, a0
-; RV64-LMULMAX2-NEXT:    ret
-;
-; RV32-LMULMAX4-LABEL: buildvec_mask_v64i1:
-; RV32-LMULMAX4:       # %bb.0:
-; RV32-LMULMAX4-NEXT:    lui a0, 748388
-; RV32-LMULMAX4-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX4-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-LMULMAX4-NEXT:    vmv.v.x v0, a0
-; RV32-LMULMAX4-NEXT:    lui a0, 748384
-; RV32-LMULMAX4-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX4-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
-; RV32-LMULMAX4-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX4-NEXT:    ret
-;
-; RV64-LMULMAX4-LABEL: buildvec_mask_v64i1:
-; RV64-LMULMAX4:       # %bb.0:
-; RV64-LMULMAX4-NEXT:    lui a0, %hi(.LCPI19_0)
-; RV64-LMULMAX4-NEXT:    addi a0, a0, %lo(.LCPI19_0)
-; RV64-LMULMAX4-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-LMULMAX4-NEXT:    vle64.v v0, (a0)
-; RV64-LMULMAX4-NEXT:    ret
-;
-; RV32-LMULMAX8-LABEL: buildvec_mask_v64i1:
-; RV32-LMULMAX8:       # %bb.0:
-; RV32-LMULMAX8-NEXT:    lui a0, 748388
-; RV32-LMULMAX8-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX8-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-LMULMAX8-NEXT:    vmv.v.x v0, a0
-; RV32-LMULMAX8-NEXT:    lui a0, 748384
-; RV32-LMULMAX8-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX8-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
-; RV32-LMULMAX8-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX8-NEXT:    ret
-;
-; RV64-LMULMAX8-LABEL: buildvec_mask_v64i1:
-; RV64-LMULMAX8:       # %bb.0:
-; RV64-LMULMAX8-NEXT:    lui a0, %hi(.LCPI19_0)
-; RV64-LMULMAX8-NEXT:    addi a0, a0, %lo(.LCPI19_0)
-; RV64-LMULMAX8-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-LMULMAX8-NEXT:    vle64.v v0, (a0)
-; RV64-LMULMAX8-NEXT:    ret
+; RV32-LABEL: buildvec_mask_v64i1:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, 748388
+; RV32-NEXT:    addi a0, a0, -1793
+; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT:    vmv.v.x v0, a0
+; RV32-NEXT:    lui a0, 748384
+; RV32-NEXT:    addi a0, a0, 1776
+; RV32-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
+; RV32-NEXT:    vmv.s.x v0, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: buildvec_mask_v64i1:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV64-NEXT:    addi a0, a0, %lo(.LCPI19_0)
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vle64.v v0, (a0)
+; RV64-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_v64i1:
 ; ZVE32F:       # %bb.0:
@@ -646,134 +510,25 @@ define <64 x i1> @buildvec_mask_v64i1() {
 }
 
 define <128 x i1> @buildvec_mask_v128i1() {
-; RV32-LMULMAX1-LABEL: buildvec_mask_v128i1:
-; RV32-LMULMAX1:       # %bb.0:
-; RV32-LMULMAX1-NEXT:    li a0, 1776
-; RV32-LMULMAX1-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; RV32-LMULMAX1-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 11
-; RV32-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV32-LMULMAX1-NEXT:    vmv.s.x v8, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 8
-; RV32-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV32-LMULMAX1-NEXT:    vmv.s.x v12, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 4
-; RV32-LMULMAX1-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX1-NEXT:    vmv.s.x v9, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 14
-; RV32-LMULMAX1-NEXT:    addi a0, a0, 1722
-; RV32-LMULMAX1-NEXT:    vmv.s.x v14, a0
-; RV32-LMULMAX1-NEXT:    vmv.v.v v10, v8
-; RV32-LMULMAX1-NEXT:    vmv.v.v v11, v0
-; RV32-LMULMAX1-NEXT:    vmv.v.v v13, v9
-; RV32-LMULMAX1-NEXT:    ret
-;
-; RV64-LMULMAX1-LABEL: buildvec_mask_v128i1:
-; RV64-LMULMAX1:       # %bb.0:
-; RV64-LMULMAX1-NEXT:    li a0, 1776
-; RV64-LMULMAX1-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; RV64-LMULMAX1-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 11
-; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV64-LMULMAX1-NEXT:    vmv.s.x v8, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 8
-; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV64-LMULMAX1-NEXT:    vmv.s.x v12, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 4
-; RV64-LMULMAX1-NEXT:    addi a0, a0, -1793
-; RV64-LMULMAX1-NEXT:    vmv.s.x v9, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 14
-; RV64-LMULMAX1-NEXT:    addi a0, a0, 1722
-; RV64-LMULMAX1-NEXT:    vmv.s.x v14, a0
-; RV64-LMULMAX1-NEXT:    vmv.v.v v10, v8
-; RV64-LMULMAX1-NEXT:    vmv.v.v v11, v0
-; RV64-LMULMAX1-NEXT:    vmv.v.v v13, v9
-; RV64-LMULMAX1-NEXT:    ret
-;
-; RV32-LMULMAX2-LABEL: buildvec_mask_v128i1:
-; RV32-LMULMAX2:       # %bb.0:
-; RV32-LMULMAX2-NEXT:    lui a0, 748384
-; RV32-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX2-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-LMULMAX2-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX2-NEXT:    lui a0, 748388
-; RV32-LMULMAX2-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX2-NEXT:    vmv.s.x v8, a0
-; RV32-LMULMAX2-NEXT:    lui a0, 551776
-; RV32-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX2-NEXT:    vmv.s.x v9, a0
-; RV32-LMULMAX2-NEXT:    lui a0, 945060
-; RV32-LMULMAX2-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX2-NEXT:    vmv.s.x v10, a0
-; RV32-LMULMAX2-NEXT:    ret
-;
-; RV64-LMULMAX2-LABEL: buildvec_mask_v128i1:
-; RV64-LMULMAX2:       # %bb.0:
-; RV64-LMULMAX2-NEXT:    lui a0, 748384
-; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV64-LMULMAX2-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-LMULMAX2-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX2-NEXT:    lui a0, 748388
-; RV64-LMULMAX2-NEXT:    addi a0, a0, -1793
-; RV64-LMULMAX2-NEXT:    vmv.s.x v8, a0
-; RV64-LMULMAX2-NEXT:    lui a0, 551776
-; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV64-LMULMAX2-NEXT:    vmv.s.x v9, a0
-; RV64-LMULMAX2-NEXT:    lui a0, 945060
-; RV64-LMULMAX2-NEXT:    addi a0, a0, -1793
-; RV64-LMULMAX2-NEXT:    vmv.s.x v10, a0
-; RV64-LMULMAX2-NEXT:    ret
-;
-; RV32-LMULMAX4-LABEL: buildvec_mask_v128i1:
-; RV32-LMULMAX4:       # %bb.0:
-; RV32-LMULMAX4-NEXT:    lui a0, 748388
-; RV32-LMULMAX4-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX4-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-LMULMAX4-NEXT:    vmv.v.x v0, a0
-; RV32-LMULMAX4-NEXT:    lui a0, 748384
-; RV32-LMULMAX4-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX4-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
-; RV32-LMULMAX4-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX4-NEXT:    lui a0, 945060
-; RV32-LMULMAX4-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX4-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-LMULMAX4-NEXT:    vmv.v.x v8, a0
-; RV32-LMULMAX4-NEXT:    lui a0, 551776
-; RV32-LMULMAX4-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX4-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
-; RV32-LMULMAX4-NEXT:    vmv.s.x v8, a0
-; RV32-LMULMAX4-NEXT:    ret
-;
-; RV64-LMULMAX4-LABEL: buildvec_mask_v128i1:
-; RV64-LMULMAX4:       # %bb.0:
-; RV64-LMULMAX4-NEXT:    lui a0, %hi(.LCPI20_0)
-; RV64-LMULMAX4-NEXT:    addi a0, a0, %lo(.LCPI20_0)
-; RV64-LMULMAX4-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-LMULMAX4-NEXT:    vle64.v v0, (a0)
-; RV64-LMULMAX4-NEXT:    lui a0, %hi(.LCPI20_1)
-; RV64-LMULMAX4-NEXT:    addi a0, a0, %lo(.LCPI20_1)
-; RV64-LMULMAX4-NEXT:    vle64.v v8, (a0)
-; RV64-LMULMAX4-NEXT:    ret
-;
-; RV32-LMULMAX8-LABEL: buildvec_mask_v128i1:
-; RV32-LMULMAX8:       # %bb.0:
-; RV32-LMULMAX8-NEXT:    lui a0, %hi(.LCPI20_0)
-; RV32-LMULMAX8-NEXT:    addi a0, a0, %lo(.LCPI20_0)
-; RV32-LMULMAX8-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-LMULMAX8-NEXT:    vle32.v v0, (a0)
-; RV32-LMULMAX8-NEXT:    ret
-;
-; RV64-LMULMAX8-LABEL: buildvec_mask_v128i1:
-; RV64-LMULMAX8:       # %bb.0:
-; RV64-LMULMAX8-NEXT:    lui a0, %hi(.LCPI20_0)
-; RV64-LMULMAX8-NEXT:    addi a0, a0, %lo(.LCPI20_0)
-; RV64-LMULMAX8-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-LMULMAX8-NEXT:    vlse64.v v0, (a0), zero
-; RV64-LMULMAX8-NEXT:    lui a0, %hi(.LCPI20_1)
-; RV64-LMULMAX8-NEXT:    ld a0, %lo(.LCPI20_1)(a0)
-; RV64-LMULMAX8-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
-; RV64-LMULMAX8-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX8-NEXT:    ret
+; RV32-LABEL: buildvec_mask_v128i1:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI20_0)
+; RV32-NEXT:    addi a0, a0, %lo(.LCPI20_0)
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vle32.v v0, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: buildvec_mask_v128i1:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a0, %hi(.LCPI20_0)
+; RV64-NEXT:    addi a0, a0, %lo(.LCPI20_0)
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vlse64.v v0, (a0), zero
+; RV64-NEXT:    lui a0, %hi(.LCPI20_1)
+; RV64-NEXT:    ld a0, %lo(.LCPI20_1)(a0)
+; RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v0, a0
+; RV64-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_v128i1:
 ; ZVE32F:       # %bb.0:
@@ -786,124 +541,14 @@ define <128 x i1> @buildvec_mask_v128i1() {
 }
 
 define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
-; RV32-LMULMAX1-LABEL: buildvec_mask_optsize_v128i1:
-; RV32-LMULMAX1:       # %bb.0:
-; RV32-LMULMAX1-NEXT:    li a0, 1776
-; RV32-LMULMAX1-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; RV32-LMULMAX1-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 11
-; RV32-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV32-LMULMAX1-NEXT:    vmv.s.x v8, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 8
-; RV32-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV32-LMULMAX1-NEXT:    vmv.s.x v12, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 4
-; RV32-LMULMAX1-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX1-NEXT:    vmv.s.x v9, a0
-; RV32-LMULMAX1-NEXT:    lui a0, 14
-; RV32-LMULMAX1-NEXT:    addi a0, a0, 1722
-; RV32-LMULMAX1-NEXT:    vmv.s.x v14, a0
-; RV32-LMULMAX1-NEXT:    vmv.v.v v10, v8
-; RV32-LMULMAX1-NEXT:    vmv.v.v v11, v0
-; RV32-LMULMAX1-NEXT:    vmv.v.v v13, v9
-; RV32-LMULMAX1-NEXT:    ret
-;
-; RV64-LMULMAX1-LABEL: buildvec_mask_optsize_v128i1:
-; RV64-LMULMAX1:       # %bb.0:
-; RV64-LMULMAX1-NEXT:    li a0, 1776
-; RV64-LMULMAX1-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; RV64-LMULMAX1-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 11
-; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV64-LMULMAX1-NEXT:    vmv.s.x v8, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 8
-; RV64-LMULMAX1-NEXT:    addi a0, a0, 1718
-; RV64-LMULMAX1-NEXT:    vmv.s.x v12, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 4
-; RV64-LMULMAX1-NEXT:    addi a0, a0, -1793
-; RV64-LMULMAX1-NEXT:    vmv.s.x v9, a0
-; RV64-LMULMAX1-NEXT:    lui a0, 14
-; RV64-LMULMAX1-NEXT:    addi a0, a0, 1722
-; RV64-LMULMAX1-NEXT:    vmv.s.x v14, a0
-; RV64-LMULMAX1-NEXT:    vmv.v.v v10, v8
-; RV64-LMULMAX1-NEXT:    vmv.v.v v11, v0
-; RV64-LMULMAX1-NEXT:    vmv.v.v v13, v9
-; RV64-LMULMAX1-NEXT:    ret
-;
-; RV32-LMULMAX2-LABEL: buildvec_mask_optsize_v128i1:
-; RV32-LMULMAX2:       # %bb.0:
-; RV32-LMULMAX2-NEXT:    lui a0, 748384
-; RV32-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX2-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-LMULMAX2-NEXT:    vmv.s.x v0, a0
-; RV32-LMULMAX2-NEXT:    lui a0, 748388
-; RV32-LMULMAX2-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX2-NEXT:    vmv.s.x v8, a0
-; RV32-LMULMAX2-NEXT:    lui a0, 551776
-; RV32-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV32-LMULMAX2-NEXT:    vmv.s.x v9, a0
-; RV32-LMULMAX2-NEXT:    lui a0, 945060
-; RV32-LMULMAX2-NEXT:    addi a0, a0, -1793
-; RV32-LMULMAX2-NEXT:    vmv.s.x v10, a0
-; RV32-LMULMAX2-NEXT:    ret
-;
-; RV64-LMULMAX2-LABEL: buildvec_mask_optsize_v128i1:
-; RV64-LMULMAX2:       # %bb.0:
-; RV64-LMULMAX2-NEXT:    lui a0, 748384
-; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV64-LMULMAX2-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-LMULMAX2-NEXT:    vmv.s.x v0, a0
-; RV64-LMULMAX2-NEXT:    lui a0, 748388
-; RV64-LMULMAX2-NEXT:    addi a0, a0, -1793
-; RV64-LMULMAX2-NEXT:    vmv.s.x v8, a0
-; RV64-LMULMAX2-NEXT:    lui a0, 551776
-; RV64-LMULMAX2-NEXT:    addi a0, a0, 1776
-; RV64-LMULMAX2-NEXT:    vmv.s.x v9, a0
-; RV64-LMULMAX2-NEXT:    lui a0, 945060
-; RV64-LMULMAX2-NEXT:    addi a0, a0, -1793
-; RV64-LMULMAX2-NEXT:    vmv.s.x v10, a0
-; RV64-LMULMAX2-NEXT:    ret
-;
-; RV32-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1:
-; RV32-LMULMAX4:       # %bb.0:
-; RV32-LMULMAX4-NEXT:    lui a0, %hi(.LCPI21_0)
-; RV32-LMULMAX4-NEXT:    addi a0, a0, %lo(.LCPI21_0)
-; RV32-LMULMAX4-NEXT:    li a1, 64
-; RV32-LMULMAX4-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; RV32-LMULMAX4-NEXT:    vlm.v v0, (a0)
-; RV32-LMULMAX4-NEXT:    lui a0, %hi(.LCPI21_1)
-; RV32-LMULMAX4-NEXT:    addi a0, a0, %lo(.LCPI21_1)
-; RV32-LMULMAX4-NEXT:    vlm.v v8, (a0)
-; RV32-LMULMAX4-NEXT:    ret
-;
-; RV64-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1:
-; RV64-LMULMAX4:       # %bb.0:
-; RV64-LMULMAX4-NEXT:    lui a0, %hi(.LCPI21_0)
-; RV64-LMULMAX4-NEXT:    addi a0, a0, %lo(.LCPI21_0)
-; RV64-LMULMAX4-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-LMULMAX4-NEXT:    vle64.v v0, (a0)
-; RV64-LMULMAX4-NEXT:    lui a0, %hi(.LCPI21_1)
-; RV64-LMULMAX4-NEXT:    addi a0, a0, %lo(.LCPI21_1)
-; RV64-LMULMAX4-NEXT:    vle64.v v8, (a0)
-; RV64-LMULMAX4-NEXT:    ret
-;
-; RV32-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1:
-; RV32-LMULMAX8:       # %bb.0:
-; RV32-LMULMAX8-NEXT:    lui a0, %hi(.LCPI21_0)
-; RV32-LMULMAX8-NEXT:    addi a0, a0, %lo(.LCPI21_0)
-; RV32-LMULMAX8-NEXT:    li a1, 128
-; RV32-LMULMAX8-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; RV32-LMULMAX8-NEXT:    vlm.v v0, (a0)
-; RV32-LMULMAX8-NEXT:    ret
-;
-; RV64-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1:
-; RV64-LMULMAX8:       # %bb.0:
-; RV64-LMULMAX8-NEXT:    lui a0, %hi(.LCPI21_0)
-; RV64-LMULMAX8-NEXT:    addi a0, a0, %lo(.LCPI21_0)
-; RV64-LMULMAX8-NEXT:    li a1, 128
-; RV64-LMULMAX8-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; RV64-LMULMAX8-NEXT:    vlm.v v0, (a0)
-; RV64-LMULMAX8-NEXT:    ret
+; CHECK-LABEL: buildvec_mask_optsize_v128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI21_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI21_0)
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_optsize_v128i1:
 ; ZVE32F:       # %bb.0:
@@ -915,6 +560,3 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
 ; ZVE32F-NEXT:    ret
   ret <128 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1>
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-RV32: {{.*}}
-; CHECK-RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll
index 0d0d21d9b45ea9..b73408d023207b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define void @load_store_v1i1(ptr %x, ptr %y) {
 ; CHECK-LABEL: load_store_v1i1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
index cb501c10d03908..4f7b885d998e5b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define void @splat_ones_v1i1(ptr %x) {
 ; CHECK-LABEL: splat_ones_v1i1:
@@ -163,37 +161,15 @@ define void @splat_zeros_v32i1(ptr %x) {
 }
 
 define void @splat_v32i1(ptr %x, i1 %y) {
-; LMULMAX2-LABEL: splat_v32i1:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    andi a1, a1, 1
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-NEXT:    vmsne.vi v10, v8, 0
-; LMULMAX2-NEXT:    vsm.v v10, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: splat_v32i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    andi a1, a1, 1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-RV32-NEXT:    vmsne.vi v8, v8, 0
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 2
-; LMULMAX1-RV32-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vsm.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: splat_v32i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    andi a1, a1, 1
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-RV64-NEXT:    vmsne.vi v8, v8, 0
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 2
-; LMULMAX1-RV64-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vsm.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: splat_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a1, a1, 1
+; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vmsne.vi v10, v8, 0
+; CHECK-NEXT:    vsm.v v10, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <32 x i1> poison, i1 %y, i32 0
   %b = shufflevector <32 x i1> %a, <32 x i1> poison, <32 x i32> zeroinitializer
   store <32 x i1> %b, ptr %x
@@ -201,70 +177,33 @@ define void @splat_v32i1(ptr %x, i1 %y) {
 }
 
 define void @splat_ones_v64i1(ptr %x) {
-; LMULMAX1-RV32-LABEL: splat_ones_v64i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmset.m v8
-; LMULMAX1-RV32-NEXT:    vsm.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 6
-; LMULMAX1-RV32-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 4
-; LMULMAX1-RV32-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    addi a0, a0, 2
-; LMULMAX1-RV32-NEXT:    vsm.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
+; RV32-LABEL: splat_ones_v64i1:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT:    vmset.m v8
+; RV32-NEXT:    vsm.v v8, (a0)
+; RV32-NEXT:    ret
 ;
-; LMULMAX1-RV64-LABEL: splat_ones_v64i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    li a1, -1
-; LMULMAX1-RV64-NEXT:    sd a1, 0(a0)
-; LMULMAX1-RV64-NEXT:    ret
+; RV64-LABEL: splat_ones_v64i1:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a1, -1
+; RV64-NEXT:    sd a1, 0(a0)
+; RV64-NEXT:    ret
   store <64 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %x
   ret void
 }
 
 define void @splat_v64i1(ptr %x, i1 %y) {
-; LMULMAX2-LABEL: splat_v64i1:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    andi a1, a1, 1
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmv.v.x v8, a1
-; LMULMAX2-NEXT:    vmsne.vi v10, v8, 0
-; LMULMAX2-NEXT:    addi a1, a0, 4
-; LMULMAX2-NEXT:    vsm.v v10, (a1)
-; LMULMAX2-NEXT:    vsm.v v10, (a0)
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: splat_v64i1:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    andi a1, a1, 1
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-RV32-NEXT:    vmsne.vi v8, v8, 0
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 6
-; LMULMAX1-RV32-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 4
-; LMULMAX1-RV32-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 2
-; LMULMAX1-RV32-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vsm.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: splat_v64i1:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    andi a1, a1, 1
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmv.v.x v8, a1
-; LMULMAX1-RV64-NEXT:    vmsne.vi v8, v8, 0
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 6
-; LMULMAX1-RV64-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 4
-; LMULMAX1-RV64-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 2
-; LMULMAX1-RV64-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vsm.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: splat_v64i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a1, a1, 1
+; CHECK-NEXT:    li a2, 64
+; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vmsne.vi v12, v8, 0
+; CHECK-NEXT:    vsm.v v12, (a0)
+; CHECK-NEXT:    ret
   %a = insertelement <64 x i1> poison, i1 %y, i32 0
   %b = shufflevector <64 x i1> %a, <64 x i1> poison, <64 x i32> zeroinitializer
   store <64 x i1> %b, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
index 5574d12d2d5dd8..0161ac4bc338db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,RV32LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,RV64LMULMAX1
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,RV32LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,RV64LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <2 x i8> @llvm.experimental.stepvector.v2i8()
 
@@ -103,18 +101,11 @@ define <8 x i16> @stepvector_v8i16() {
 declare <16 x i16> @llvm.experimental.stepvector.v16i16()
 
 define <16 x i16> @stepvector_v16i16() {
-; LMULMAX1-LABEL: stepvector_v16i16:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-NEXT:    vid.v v8
-; LMULMAX1-NEXT:    vadd.vi v9, v8, 8
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX2-LABEL: stepvector_v16i16:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; LMULMAX2-NEXT:    vid.v v8
-; LMULMAX2-NEXT:    ret
+; CHECK-LABEL: stepvector_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    ret
   %v = call <16 x i16> @llvm.experimental.stepvector.v16i16()
   ret <16 x i16> %v
 }
@@ -146,18 +137,11 @@ define <4 x i32> @stepvector_v4i32() {
 declare <8 x i32> @llvm.experimental.stepvector.v8i32()
 
 define <8 x i32> @stepvector_v8i32() {
-; LMULMAX1-LABEL: stepvector_v8i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vid.v v8
-; LMULMAX1-NEXT:    vadd.vi v9, v8, 4
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX2-LABEL: stepvector_v8i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vid.v v8
-; LMULMAX2-NEXT:    ret
+; CHECK-LABEL: stepvector_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    ret
   %v = call <8 x i32> @llvm.experimental.stepvector.v8i32()
   ret <8 x i32> %v
 }
@@ -165,21 +149,11 @@ define <8 x i32> @stepvector_v8i32() {
 declare <16 x i32> @llvm.experimental.stepvector.v16i32()
 
 define <16 x i32> @stepvector_v16i32() {
-; LMULMAX1-LABEL: stepvector_v16i32:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT:    vid.v v8
-; LMULMAX1-NEXT:    vadd.vi v9, v8, 4
-; LMULMAX1-NEXT:    vadd.vi v10, v8, 8
-; LMULMAX1-NEXT:    vadd.vi v11, v8, 12
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX2-LABEL: stepvector_v16i32:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT:    vid.v v8
-; LMULMAX2-NEXT:    vadd.vi v10, v8, 8
-; LMULMAX2-NEXT:    ret
+; CHECK-LABEL: stepvector_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    ret
   %v = call <16 x i32> @llvm.experimental.stepvector.v16i32()
   ret <16 x i32> %v
 }
@@ -187,33 +161,19 @@ define <16 x i32> @stepvector_v16i32() {
 declare <2 x i64> @llvm.experimental.stepvector.v2i64()
 
 define <2 x i64> @stepvector_v2i64() {
-; RV32LMULMAX1-LABEL: stepvector_v2i64:
-; RV32LMULMAX1:       # %bb.0:
-; RV32LMULMAX1-NEXT:    lui a0, 16
-; RV32LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32LMULMAX1-NEXT:    vmv.s.x v9, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v8, v9
-; RV32LMULMAX1-NEXT:    ret
-;
-; RV64LMULMAX1-LABEL: stepvector_v2i64:
-; RV64LMULMAX1:       # %bb.0:
-; RV64LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64LMULMAX1-NEXT:    vid.v v8
-; RV64LMULMAX1-NEXT:    ret
+; RV32-LABEL: stepvector_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, 16
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v9, a0
+; RV32-NEXT:    vsext.vf4 v8, v9
+; RV32-NEXT:    ret
 ;
-; RV32LMULMAX2-LABEL: stepvector_v2i64:
-; RV32LMULMAX2:       # %bb.0:
-; RV32LMULMAX2-NEXT:    lui a0, 16
-; RV32LMULMAX2-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32LMULMAX2-NEXT:    vmv.s.x v9, a0
-; RV32LMULMAX2-NEXT:    vsext.vf4 v8, v9
-; RV32LMULMAX2-NEXT:    ret
-;
-; RV64LMULMAX2-LABEL: stepvector_v2i64:
-; RV64LMULMAX2:       # %bb.0:
-; RV64LMULMAX2-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64LMULMAX2-NEXT:    vid.v v8
-; RV64LMULMAX2-NEXT:    ret
+; RV64-LABEL: stepvector_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vid.v v8
+; RV64-NEXT:    ret
   %v = call <2 x i64> @llvm.experimental.stepvector.v2i64()
   ret <2 x i64> %v
 }
@@ -221,39 +181,20 @@ define <2 x i64> @stepvector_v2i64() {
 declare <4 x i64> @llvm.experimental.stepvector.v4i64()
 
 define <4 x i64> @stepvector_v4i64() {
-; RV32LMULMAX1-LABEL: stepvector_v4i64:
-; RV32LMULMAX1:       # %bb.0:
-; RV32LMULMAX1-NEXT:    lui a0, 16
-; RV32LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32LMULMAX1-NEXT:    vmv.s.x v9, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v8, v9
-; RV32LMULMAX1-NEXT:    lui a0, 48
-; RV32LMULMAX1-NEXT:    addi a0, a0, 2
-; RV32LMULMAX1-NEXT:    vmv.s.x v10, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v9, v10
-; RV32LMULMAX1-NEXT:    ret
-;
-; RV64LMULMAX1-LABEL: stepvector_v4i64:
-; RV64LMULMAX1:       # %bb.0:
-; RV64LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64LMULMAX1-NEXT:    vid.v v8
-; RV64LMULMAX1-NEXT:    vadd.vi v9, v8, 2
-; RV64LMULMAX1-NEXT:    ret
+; RV32-LABEL: stepvector_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI14_0)
+; RV32-NEXT:    addi a0, a0, %lo(.LCPI14_0)
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vle8.v v10, (a0)
+; RV32-NEXT:    vsext.vf4 v8, v10
+; RV32-NEXT:    ret
 ;
-; RV32LMULMAX2-LABEL: stepvector_v4i64:
-; RV32LMULMAX2:       # %bb.0:
-; RV32LMULMAX2-NEXT:    lui a0, %hi(.LCPI14_0)
-; RV32LMULMAX2-NEXT:    addi a0, a0, %lo(.LCPI14_0)
-; RV32LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32LMULMAX2-NEXT:    vle8.v v10, (a0)
-; RV32LMULMAX2-NEXT:    vsext.vf4 v8, v10
-; RV32LMULMAX2-NEXT:    ret
-;
-; RV64LMULMAX2-LABEL: stepvector_v4i64:
-; RV64LMULMAX2:       # %bb.0:
-; RV64LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64LMULMAX2-NEXT:    vid.v v8
-; RV64LMULMAX2-NEXT:    ret
+; RV64-LABEL: stepvector_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vid.v v8
+; RV64-NEXT:    ret
   %v = call <4 x i64> @llvm.experimental.stepvector.v4i64()
   ret <4 x i64> %v
 }
@@ -261,54 +202,20 @@ define <4 x i64> @stepvector_v4i64() {
 declare <8 x i64> @llvm.experimental.stepvector.v8i64()
 
 define <8 x i64> @stepvector_v8i64() {
-; RV32LMULMAX1-LABEL: stepvector_v8i64:
-; RV32LMULMAX1:       # %bb.0:
-; RV32LMULMAX1-NEXT:    lui a0, 16
-; RV32LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32LMULMAX1-NEXT:    vmv.s.x v9, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v8, v9
-; RV32LMULMAX1-NEXT:    lui a0, 48
-; RV32LMULMAX1-NEXT:    addi a0, a0, 2
-; RV32LMULMAX1-NEXT:    vmv.s.x v10, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v9, v10
-; RV32LMULMAX1-NEXT:    lui a0, 80
-; RV32LMULMAX1-NEXT:    addi a0, a0, 4
-; RV32LMULMAX1-NEXT:    vmv.s.x v11, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v10, v11
-; RV32LMULMAX1-NEXT:    lui a0, 112
-; RV32LMULMAX1-NEXT:    addi a0, a0, 6
-; RV32LMULMAX1-NEXT:    vmv.s.x v12, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v11, v12
-; RV32LMULMAX1-NEXT:    ret
-;
-; RV64LMULMAX1-LABEL: stepvector_v8i64:
-; RV64LMULMAX1:       # %bb.0:
-; RV64LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64LMULMAX1-NEXT:    vid.v v8
-; RV64LMULMAX1-NEXT:    vadd.vi v9, v8, 2
-; RV64LMULMAX1-NEXT:    vadd.vi v10, v8, 4
-; RV64LMULMAX1-NEXT:    vadd.vi v11, v8, 6
-; RV64LMULMAX1-NEXT:    ret
-;
-; RV32LMULMAX2-LABEL: stepvector_v8i64:
-; RV32LMULMAX2:       # %bb.0:
-; RV32LMULMAX2-NEXT:    lui a0, %hi(.LCPI15_0)
-; RV32LMULMAX2-NEXT:    addi a0, a0, %lo(.LCPI15_0)
-; RV32LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32LMULMAX2-NEXT:    vle8.v v10, (a0)
-; RV32LMULMAX2-NEXT:    lui a0, %hi(.LCPI15_1)
-; RV32LMULMAX2-NEXT:    addi a0, a0, %lo(.LCPI15_1)
-; RV32LMULMAX2-NEXT:    vle8.v v12, (a0)
-; RV32LMULMAX2-NEXT:    vsext.vf4 v8, v10
-; RV32LMULMAX2-NEXT:    vsext.vf4 v10, v12
-; RV32LMULMAX2-NEXT:    ret
+; RV32-LABEL: stepvector_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32-NEXT:    addi a0, a0, %lo(.LCPI15_0)
+; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT:    vle8.v v12, (a0)
+; RV32-NEXT:    vsext.vf4 v8, v12
+; RV32-NEXT:    ret
 ;
-; RV64LMULMAX2-LABEL: stepvector_v8i64:
-; RV64LMULMAX2:       # %bb.0:
-; RV64LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64LMULMAX2-NEXT:    vid.v v8
-; RV64LMULMAX2-NEXT:    vadd.vi v10, v8, 4
-; RV64LMULMAX2-NEXT:    ret
+; RV64-LABEL: stepvector_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vid.v v8
+; RV64-NEXT:    ret
   %v = call <8 x i64> @llvm.experimental.stepvector.v8i64()
   ret <8 x i64> %v
 }
@@ -316,84 +223,21 @@ define <8 x i64> @stepvector_v8i64() {
 declare <16 x i64> @llvm.experimental.stepvector.v16i64()
 
 define <16 x i64> @stepvector_v16i64() {
-; RV32LMULMAX1-LABEL: stepvector_v16i64:
-; RV32LMULMAX1:       # %bb.0:
-; RV32LMULMAX1-NEXT:    lui a0, 16
-; RV32LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32LMULMAX1-NEXT:    vmv.s.x v9, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v8, v9
-; RV32LMULMAX1-NEXT:    lui a0, 48
-; RV32LMULMAX1-NEXT:    addi a0, a0, 2
-; RV32LMULMAX1-NEXT:    vmv.s.x v10, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v9, v10
-; RV32LMULMAX1-NEXT:    lui a0, 80
-; RV32LMULMAX1-NEXT:    addi a0, a0, 4
-; RV32LMULMAX1-NEXT:    vmv.s.x v11, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v10, v11
-; RV32LMULMAX1-NEXT:    lui a0, 112
-; RV32LMULMAX1-NEXT:    addi a0, a0, 6
-; RV32LMULMAX1-NEXT:    vmv.s.x v12, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v11, v12
-; RV32LMULMAX1-NEXT:    lui a0, 144
-; RV32LMULMAX1-NEXT:    addi a0, a0, 8
-; RV32LMULMAX1-NEXT:    vmv.s.x v13, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v12, v13
-; RV32LMULMAX1-NEXT:    lui a0, 176
-; RV32LMULMAX1-NEXT:    addi a0, a0, 10
-; RV32LMULMAX1-NEXT:    vmv.s.x v14, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v13, v14
-; RV32LMULMAX1-NEXT:    lui a0, 208
-; RV32LMULMAX1-NEXT:    addi a0, a0, 12
-; RV32LMULMAX1-NEXT:    vmv.s.x v15, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v14, v15
-; RV32LMULMAX1-NEXT:    lui a0, 240
-; RV32LMULMAX1-NEXT:    addi a0, a0, 14
-; RV32LMULMAX1-NEXT:    vmv.s.x v16, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v15, v16
-; RV32LMULMAX1-NEXT:    ret
-;
-; RV64LMULMAX1-LABEL: stepvector_v16i64:
-; RV64LMULMAX1:       # %bb.0:
-; RV64LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64LMULMAX1-NEXT:    vid.v v8
-; RV64LMULMAX1-NEXT:    vadd.vi v9, v8, 2
-; RV64LMULMAX1-NEXT:    vadd.vi v10, v8, 4
-; RV64LMULMAX1-NEXT:    vadd.vi v11, v8, 6
-; RV64LMULMAX1-NEXT:    vadd.vi v12, v8, 8
-; RV64LMULMAX1-NEXT:    vadd.vi v13, v8, 10
-; RV64LMULMAX1-NEXT:    vadd.vi v14, v8, 12
-; RV64LMULMAX1-NEXT:    vadd.vi v15, v8, 14
-; RV64LMULMAX1-NEXT:    ret
-;
-; RV32LMULMAX2-LABEL: stepvector_v16i64:
-; RV32LMULMAX2:       # %bb.0:
-; RV32LMULMAX2-NEXT:    lui a0, %hi(.LCPI16_0)
-; RV32LMULMAX2-NEXT:    addi a0, a0, %lo(.LCPI16_0)
-; RV32LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32LMULMAX2-NEXT:    vle8.v v10, (a0)
-; RV32LMULMAX2-NEXT:    lui a0, %hi(.LCPI16_1)
-; RV32LMULMAX2-NEXT:    addi a0, a0, %lo(.LCPI16_1)
-; RV32LMULMAX2-NEXT:    vle8.v v12, (a0)
-; RV32LMULMAX2-NEXT:    lui a0, %hi(.LCPI16_2)
-; RV32LMULMAX2-NEXT:    addi a0, a0, %lo(.LCPI16_2)
-; RV32LMULMAX2-NEXT:    vle8.v v14, (a0)
-; RV32LMULMAX2-NEXT:    lui a0, %hi(.LCPI16_3)
-; RV32LMULMAX2-NEXT:    addi a0, a0, %lo(.LCPI16_3)
-; RV32LMULMAX2-NEXT:    vle8.v v16, (a0)
-; RV32LMULMAX2-NEXT:    vsext.vf4 v8, v10
-; RV32LMULMAX2-NEXT:    vsext.vf4 v10, v12
-; RV32LMULMAX2-NEXT:    vsext.vf4 v12, v14
-; RV32LMULMAX2-NEXT:    vsext.vf4 v14, v16
-; RV32LMULMAX2-NEXT:    ret
+; RV32-LABEL: stepvector_v16i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, %hi(.LCPI16_0)
+; RV32-NEXT:    addi a0, a0, %lo(.LCPI16_0)
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT:    vle8.v v16, (a0)
+; RV32-NEXT:    vsext.vf4 v8, v16
+; RV32-NEXT:    ret
 ;
-; RV64LMULMAX2-LABEL: stepvector_v16i64:
-; RV64LMULMAX2:       # %bb.0:
-; RV64LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64LMULMAX2-NEXT:    vid.v v8
-; RV64LMULMAX2-NEXT:    vadd.vi v10, v8, 4
-; RV64LMULMAX2-NEXT:    vadd.vi v12, v8, 8
-; RV64LMULMAX2-NEXT:    vadd.vi v14, v8, 12
-; RV64LMULMAX2-NEXT:    ret
+; RV64-LABEL: stepvector_v16i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vid.v v8
+; RV64-NEXT:    ret
   %v = call <16 x i64> @llvm.experimental.stepvector.v16i64()
   ret <16 x i64> %v
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
index b18e235bb97650..44d4a8a1e04cda 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
 
 declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>)
 
@@ -474,21 +472,13 @@ define zeroext i1 @vreduce_smin_v16i1(<16 x i1> %v) {
 declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>)
 
 define zeroext i1 @vreduce_or_v32i1(<32 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_or_v32i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmor.mm v8, v0, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    snez a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_or_v32i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vcpop.m a0, v0
-; LMULMAX8-NEXT:    snez a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_or_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %v)
   ret i1 %red
 }
@@ -496,21 +486,13 @@ define zeroext i1 @vreduce_or_v32i1(<32 x i1> %v) {
 declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>)
 
 define zeroext i1 @vreduce_xor_v32i1(<32 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_xor_v32i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmxor.mm v8, v0, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    andi a0, a0, 1
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_xor_v32i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vcpop.m a0, v0
-; LMULMAX8-NEXT:    andi a0, a0, 1
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_xor_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %v)
   ret i1 %red
 }
@@ -518,22 +500,14 @@ define zeroext i1 @vreduce_xor_v32i1(<32 x i1> %v) {
 declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>)
 
 define zeroext i1 @vreduce_and_v32i1(<32 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_and_v32i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmnand.mm v8, v0, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    seqz a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_and_v32i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vmnot.m v8, v0
-; LMULMAX8-NEXT:    vcpop.m a0, v8
-; LMULMAX8-NEXT:    seqz a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_and_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v)
   ret i1 %red
 }
@@ -541,21 +515,13 @@ define zeroext i1 @vreduce_and_v32i1(<32 x i1> %v) {
 declare i1 @llvm.vector.reduce.umax.v32i1(<32 x i1>)
 
 define zeroext i1 @vreduce_umax_v32i1(<32 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_umax_v32i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmor.mm v8, v0, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    snez a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_umax_v32i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vcpop.m a0, v0
-; LMULMAX8-NEXT:    snez a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_umax_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> %v)
   ret i1 %red
 }
@@ -563,22 +529,14 @@ define zeroext i1 @vreduce_umax_v32i1(<32 x i1> %v) {
 declare i1 @llvm.vector.reduce.smax.v32i1(<32 x i1>)
 
 define zeroext i1 @vreduce_smax_v32i1(<32 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_smax_v32i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmnand.mm v8, v0, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    seqz a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_smax_v32i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vmnot.m v8, v0
-; LMULMAX8-NEXT:    vcpop.m a0, v8
-; LMULMAX8-NEXT:    seqz a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_smax_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> %v)
   ret i1 %red
 }
@@ -586,22 +544,14 @@ define zeroext i1 @vreduce_smax_v32i1(<32 x i1> %v) {
 declare i1 @llvm.vector.reduce.umin.v32i1(<32 x i1>)
 
 define zeroext i1 @vreduce_umin_v32i1(<32 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_umin_v32i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmnand.mm v8, v0, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    seqz a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_umin_v32i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vmnot.m v8, v0
-; LMULMAX8-NEXT:    vcpop.m a0, v8
-; LMULMAX8-NEXT:    seqz a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_umin_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> %v)
   ret i1 %red
 }
@@ -609,21 +559,13 @@ define zeroext i1 @vreduce_umin_v32i1(<32 x i1> %v) {
 declare i1 @llvm.vector.reduce.smin.v32i1(<32 x i1>)
 
 define zeroext i1 @vreduce_smin_v32i1(<32 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_smin_v32i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmor.mm v8, v0, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    snez a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_smin_v32i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vcpop.m a0, v0
-; LMULMAX8-NEXT:    snez a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_smin_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> %v)
   ret i1 %red
 }
@@ -631,23 +573,13 @@ define zeroext i1 @vreduce_smin_v32i1(<32 x i1> %v) {
 declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>)
 
 define zeroext i1 @vreduce_or_v64i1(<64 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_or_v64i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmor.mm v8, v8, v10
-; LMULMAX1-NEXT:    vmor.mm v9, v0, v9
-; LMULMAX1-NEXT:    vmor.mm v8, v9, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    snez a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_or_v64i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 64
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; LMULMAX8-NEXT:    vcpop.m a0, v0
-; LMULMAX8-NEXT:    snez a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_or_v64i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %v)
   ret i1 %red
 }
@@ -655,23 +587,13 @@ define zeroext i1 @vreduce_or_v64i1(<64 x i1> %v) {
 declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>)
 
 define zeroext i1 @vreduce_xor_v64i1(<64 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_xor_v64i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmxor.mm v8, v8, v10
-; LMULMAX1-NEXT:    vmxor.mm v9, v0, v9
-; LMULMAX1-NEXT:    vmxor.mm v8, v9, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    andi a0, a0, 1
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_xor_v64i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 64
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; LMULMAX8-NEXT:    vcpop.m a0, v0
-; LMULMAX8-NEXT:    andi a0, a0, 1
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_xor_v64i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %v)
   ret i1 %red
 }
@@ -679,24 +601,14 @@ define zeroext i1 @vreduce_xor_v64i1(<64 x i1> %v) {
 declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>)
 
 define zeroext i1 @vreduce_and_v64i1(<64 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_and_v64i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmand.mm v8, v8, v10
-; LMULMAX1-NEXT:    vmand.mm v9, v0, v9
-; LMULMAX1-NEXT:    vmnand.mm v8, v9, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    seqz a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_and_v64i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 64
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; LMULMAX8-NEXT:    vmnot.m v8, v0
-; LMULMAX8-NEXT:    vcpop.m a0, v8
-; LMULMAX8-NEXT:    seqz a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_and_v64i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v)
   ret i1 %red
 }
@@ -704,23 +616,13 @@ define zeroext i1 @vreduce_and_v64i1(<64 x i1> %v) {
 declare i1 @llvm.vector.reduce.umax.v64i1(<64 x i1>)
 
 define zeroext i1 @vreduce_umax_v64i1(<64 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_umax_v64i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmor.mm v8, v8, v10
-; LMULMAX1-NEXT:    vmor.mm v9, v0, v9
-; LMULMAX1-NEXT:    vmor.mm v8, v9, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    snez a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_umax_v64i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 64
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; LMULMAX8-NEXT:    vcpop.m a0, v0
-; LMULMAX8-NEXT:    snez a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_umax_v64i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> %v)
   ret i1 %red
 }
@@ -728,24 +630,14 @@ define zeroext i1 @vreduce_umax_v64i1(<64 x i1> %v) {
 declare i1 @llvm.vector.reduce.smax.v64i1(<64 x i1>)
 
 define zeroext i1 @vreduce_smax_v64i1(<64 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_smax_v64i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmand.mm v8, v8, v10
-; LMULMAX1-NEXT:    vmand.mm v9, v0, v9
-; LMULMAX1-NEXT:    vmnand.mm v8, v9, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    seqz a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_smax_v64i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 64
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; LMULMAX8-NEXT:    vmnot.m v8, v0
-; LMULMAX8-NEXT:    vcpop.m a0, v8
-; LMULMAX8-NEXT:    seqz a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_smax_v64i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> %v)
   ret i1 %red
 }
@@ -753,24 +645,14 @@ define zeroext i1 @vreduce_smax_v64i1(<64 x i1> %v) {
 declare i1 @llvm.vector.reduce.umin.v64i1(<64 x i1>)
 
 define zeroext i1 @vreduce_umin_v64i1(<64 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_umin_v64i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmand.mm v8, v8, v10
-; LMULMAX1-NEXT:    vmand.mm v9, v0, v9
-; LMULMAX1-NEXT:    vmnand.mm v8, v9, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    seqz a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_umin_v64i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 64
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; LMULMAX8-NEXT:    vmnot.m v8, v0
-; LMULMAX8-NEXT:    vcpop.m a0, v8
-; LMULMAX8-NEXT:    seqz a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_umin_v64i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> %v)
   ret i1 %red
 }
@@ -778,23 +660,13 @@ define zeroext i1 @vreduce_umin_v64i1(<64 x i1> %v) {
 declare i1 @llvm.vector.reduce.smin.v64i1(<64 x i1>)
 
 define zeroext i1 @vreduce_smin_v64i1(<64 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_smin_v64i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmor.mm v8, v8, v10
-; LMULMAX1-NEXT:    vmor.mm v9, v0, v9
-; LMULMAX1-NEXT:    vmor.mm v8, v9, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    snez a0, a0
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_smin_v64i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 64
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; LMULMAX8-NEXT:    vcpop.m a0, v0
-; LMULMAX8-NEXT:    snez a0, a0
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_smin_v64i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> %v)
   ret i1 %red
 }
@@ -867,21 +739,13 @@ define zeroext i1 @vreduce_add_v16i1(<16 x i1> %v) {
 declare i1 @llvm.vector.reduce.add.v32i1(<32 x i1>)
 
 define zeroext i1 @vreduce_add_v32i1(<32 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_add_v32i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmxor.mm v8, v0, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    andi a0, a0, 1
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_add_v32i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 32
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; LMULMAX8-NEXT:    vcpop.m a0, v0
-; LMULMAX8-NEXT:    andi a0, a0, 1
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_add_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> %v)
   ret i1 %red
 }
@@ -889,23 +753,13 @@ define zeroext i1 @vreduce_add_v32i1(<32 x i1> %v) {
 declare i1 @llvm.vector.reduce.add.v64i1(<64 x i1>)
 
 define zeroext i1 @vreduce_add_v64i1(<64 x i1> %v) {
-; LMULMAX1-LABEL: vreduce_add_v64i1:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-NEXT:    vmxor.mm v8, v8, v10
-; LMULMAX1-NEXT:    vmxor.mm v9, v0, v9
-; LMULMAX1-NEXT:    vmxor.mm v8, v9, v8
-; LMULMAX1-NEXT:    vcpop.m a0, v8
-; LMULMAX1-NEXT:    andi a0, a0, 1
-; LMULMAX1-NEXT:    ret
-;
-; LMULMAX8-LABEL: vreduce_add_v64i1:
-; LMULMAX8:       # %bb.0:
-; LMULMAX8-NEXT:    li a0, 64
-; LMULMAX8-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; LMULMAX8-NEXT:    vcpop.m a0, v0
-; LMULMAX8-NEXT:    andi a0, a0, 1
-; LMULMAX8-NEXT:    ret
+; CHECK-LABEL: vreduce_add_v64i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
   %red = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> %v)
   ret i1 %red
 }



More information about the llvm-commits mailing list