[llvm] 0ff19b1 - [RISCV][NFC] Add some check prefixes to remove redundant checks in some IR tests

Lian Wang via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 22 01:17:27 PDT 2022


Author: Lian Wang
Date: 2022-03-22T08:14:08Z
New Revision: 0ff19b190569b591b996a884ec42304c308aedbb

URL: https://github.com/llvm/llvm-project/commit/0ff19b190569b591b996a884ec42304c308aedbb
DIFF: https://github.com/llvm/llvm-project/commit/0ff19b190569b591b996a884ec42304c308aedbb.diff

LOG: [RISCV][NFC] Add some check prefixes to remove redundant checks in some IR tests

Reviewed By: frasercrmck, jacquesguan

Differential Revision: https://reviews.llvm.org/D122211

Added: 
    

Modified: 
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
index b02c4f6383710..33ac1b42a8465 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
@@ -1,49 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1-RV64
 
 define void @bswap_v8i16(<8 x i16>* %x, <8 x i16>* %y) {
-; LMULMAX2-RV32-LABEL: bswap_v8i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: bswap_v8i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: bswap_v8i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: bswap_v8i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: bswap_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v9, v8, 8
+; CHECK-NEXT:    vsll.vi v8, v8, 8
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <8 x i16>, <8 x i16>* %x
   %b = load <8 x i16>, <8 x i16>* %y
   %c = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
@@ -53,81 +23,43 @@ define void @bswap_v8i16(<8 x i16>* %x, <8 x i16>* %y) {
 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
 
 define void @bswap_v4i32(<4 x i32>* %x, <4 x i32>* %y) {
-; LMULMAX2-RV32-LABEL: bswap_v4i32:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV32-NEXT:    lui a1, 16
-; LMULMAX2-RV32-NEXT:    addi a1, a1, -256
-; LMULMAX2-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 24
-; LMULMAX2-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX2-RV32-NEXT:    vsll.vi v10, v8, 8
-; LMULMAX2-RV32-NEXT:    lui a1, 4080
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
+; RV32-LABEL: bswap_v4i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    vsrl.vi v9, v8, 8
+; RV32-NEXT:    lui a1, 16
+; RV32-NEXT:    addi a1, a1, -256
+; RV32-NEXT:    vand.vx v9, v9, a1
+; RV32-NEXT:    vsrl.vi v10, v8, 24
+; RV32-NEXT:    vor.vv v9, v9, v10
+; RV32-NEXT:    vsll.vi v10, v8, 8
+; RV32-NEXT:    lui a1, 4080
+; RV32-NEXT:    vand.vx v10, v10, a1
+; RV32-NEXT:    vsll.vi v8, v8, 24
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vor.vv v8, v8, v9
+; RV32-NEXT:    vse32.v v8, (a0)
+; RV32-NEXT:    ret
 ;
-; LMULMAX2-RV64-LABEL: bswap_v4i32:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX2-RV64-NEXT:    lui a1, 16
-; LMULMAX2-RV64-NEXT:    addiw a1, a1, -256
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 24
-; LMULMAX2-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX2-RV64-NEXT:    vsll.vi v10, v8, 8
-; LMULMAX2-RV64-NEXT:    lui a1, 4080
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: bswap_v4i32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX1-RV32-NEXT:    lui a1, 16
-; LMULMAX1-RV32-NEXT:    addi a1, a1, -256
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsll.vi v10, v8, 8
-; LMULMAX1-RV32-NEXT:    lui a1, 4080
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: bswap_v4i32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 8
-; LMULMAX1-RV64-NEXT:    lui a1, 16
-; LMULMAX1-RV64-NEXT:    addiw a1, a1, -256
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsll.vi v10, v8, 8
-; LMULMAX1-RV64-NEXT:    lui a1, 4080
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 24
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; RV64-LABEL: bswap_v4i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    vsrl.vi v9, v8, 8
+; RV64-NEXT:    lui a1, 16
+; RV64-NEXT:    addiw a1, a1, -256
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vsrl.vi v10, v8, 24
+; RV64-NEXT:    vor.vv v9, v9, v10
+; RV64-NEXT:    vsll.vi v10, v8, 8
+; RV64-NEXT:    lui a1, 4080
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vsll.vi v8, v8, 24
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vor.vv v8, v8, v9
+; RV64-NEXT:    vse32.v v8, (a0)
+; RV64-NEXT:    ret
   %a = load <4 x i32>, <4 x i32>* %x
   %b = load <4 x i32>, <4 x i32>* %y
   %c = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
@@ -137,189 +69,97 @@ define void @bswap_v4i32(<4 x i32>* %x, <4 x i32>* %y) {
 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
 
 define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
-; LMULMAX2-RV32-LABEL: bswap_v2i64:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    li a1, 56
-; LMULMAX2-RV32-NEXT:    vsrl.vx v9, v8, a1
-; LMULMAX2-RV32-NEXT:    li a2, 40
-; LMULMAX2-RV32-NEXT:    vsrl.vx v10, v8, a2
-; LMULMAX2-RV32-NEXT:    lui a3, 16
-; LMULMAX2-RV32-NEXT:    addi a3, a3, -256
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX2-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 24
-; LMULMAX2-RV32-NEXT:    lui a4, 4080
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a4
-; LMULMAX2-RV32-NEXT:    li a5, 5
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
-; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a5
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vmv.v.i v11, 0
-; LMULMAX2-RV32-NEXT:    lui a5, 1044480
-; LMULMAX2-RV32-NEXT:    vmerge.vxm v11, v11, a5, v0
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vsrl.vi v12, v8, 8
-; LMULMAX2-RV32-NEXT:    vand.vv v11, v12, v11
-; LMULMAX2-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX2-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX2-RV32-NEXT:    li a5, 255
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a5
-; LMULMAX2-RV32-NEXT:    vmerge.vim v10, v10, 0, v0
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vsll.vi v11, v8, 8
-; LMULMAX2-RV32-NEXT:    vand.vv v10, v11, v10
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v11, a3
-; LMULMAX2-RV32-NEXT:    vmerge.vim v11, v11, 0, v0
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vsll.vi v12, v8, 24
-; LMULMAX2-RV32-NEXT:    vand.vv v11, v12, v11
-; LMULMAX2-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX2-RV32-NEXT:    vsll.vx v11, v8, a2
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vmv.v.x v12, a4
-; LMULMAX2-RV32-NEXT:    vmerge.vim v12, v12, 0, v0
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vand.vv v11, v11, v12
-; LMULMAX2-RV32-NEXT:    vsll.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v11
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: bswap_v2i64:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    li a1, 56
-; LMULMAX2-RV64-NEXT:    vsrl.vx v9, v8, a1
-; LMULMAX2-RV64-NEXT:    li a2, 40
-; LMULMAX2-RV64-NEXT:    vsrl.vx v10, v8, a2
-; LMULMAX2-RV64-NEXT:    lui a3, 16
-; LMULMAX2-RV64-NEXT:    addiw a3, a3, -256
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX2-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 24
-; LMULMAX2-RV64-NEXT:    lui a3, 4080
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX2-RV64-NEXT:    vsrl.vi v11, v8, 8
-; LMULMAX2-RV64-NEXT:    li a3, 255
-; LMULMAX2-RV64-NEXT:    slli a4, a3, 24
-; LMULMAX2-RV64-NEXT:    vand.vx v11, v11, a4
-; LMULMAX2-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX2-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX2-RV64-NEXT:    vsll.vi v10, v8, 8
-; LMULMAX2-RV64-NEXT:    slli a4, a3, 32
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a4
-; LMULMAX2-RV64-NEXT:    vsll.vi v11, v8, 24
-; LMULMAX2-RV64-NEXT:    slli a4, a3, 40
-; LMULMAX2-RV64-NEXT:    vand.vx v11, v11, a4
-; LMULMAX2-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX2-RV64-NEXT:    vsll.vx v11, v8, a1
-; LMULMAX2-RV64-NEXT:    vsll.vx v8, v8, a2
-; LMULMAX2-RV64-NEXT:    slli a1, a3, 48
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v11, v8
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: bswap_v2i64:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    li a1, 56
-; LMULMAX1-RV32-NEXT:    vsrl.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    li a2, 40
-; LMULMAX1-RV32-NEXT:    vsrl.vx v10, v8, a2
-; LMULMAX1-RV32-NEXT:    lui a3, 16
-; LMULMAX1-RV32-NEXT:    addi a3, a3, -256
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 24
-; LMULMAX1-RV32-NEXT:    lui a4, 4080
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV32-NEXT:    li a5, 5
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a5
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.v.i v11, 0
-; LMULMAX1-RV32-NEXT:    lui a5, 1044480
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v11, v11, a5, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vsrl.vi v12, v8, 8
-; LMULMAX1-RV32-NEXT:    vand.vv v11, v12, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    li a5, 255
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v10, a5
-; LMULMAX1-RV32-NEXT:    vmerge.vim v10, v10, 0, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vsll.vi v11, v8, 8
-; LMULMAX1-RV32-NEXT:    vand.vv v10, v11, v10
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v11, a3
-; LMULMAX1-RV32-NEXT:    vmerge.vim v11, v11, 0, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vsll.vi v12, v8, 24
-; LMULMAX1-RV32-NEXT:    vand.vv v11, v12, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV32-NEXT:    vsll.vx v11, v8, a2
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v12, a4
-; LMULMAX1-RV32-NEXT:    vmerge.vim v12, v12, 0, v0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vand.vv v11, v11, v12
-; LMULMAX1-RV32-NEXT:    vsll.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
+; RV32-LABEL: bswap_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    vsrl.vx v9, v8, a1
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    vsrl.vx v10, v8, a2
+; RV32-NEXT:    lui a3, 16
+; RV32-NEXT:    addi a3, a3, -256
+; RV32-NEXT:    vand.vx v10, v10, a3
+; RV32-NEXT:    vor.vv v9, v10, v9
+; RV32-NEXT:    vsrl.vi v10, v8, 24
+; RV32-NEXT:    lui a4, 4080
+; RV32-NEXT:    vand.vx v10, v10, a4
+; RV32-NEXT:    li a5, 5
+; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV32-NEXT:    vmv.s.x v0, a5
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV32-NEXT:    vmv.v.i v11, 0
+; RV32-NEXT:    lui a5, 1044480
+; RV32-NEXT:    vmerge.vxm v11, v11, a5, v0
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; RV32-NEXT:    vsrl.vi v12, v8, 8
+; RV32-NEXT:    vand.vv v11, v12, v11
+; RV32-NEXT:    vor.vv v10, v11, v10
+; RV32-NEXT:    vor.vv v9, v10, v9
+; RV32-NEXT:    li a5, 255
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV32-NEXT:    vmv.v.x v10, a5
+; RV32-NEXT:    vmerge.vim v10, v10, 0, v0
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; RV32-NEXT:    vsll.vi v11, v8, 8
+; RV32-NEXT:    vand.vv v10, v11, v10
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV32-NEXT:    vmv.v.x v11, a3
+; RV32-NEXT:    vmerge.vim v11, v11, 0, v0
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; RV32-NEXT:    vsll.vi v12, v8, 24
+; RV32-NEXT:    vand.vv v11, v12, v11
+; RV32-NEXT:    vor.vv v10, v11, v10
+; RV32-NEXT:    vsll.vx v11, v8, a2
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV32-NEXT:    vmv.v.x v12, a4
+; RV32-NEXT:    vmerge.vim v12, v12, 0, v0
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; RV32-NEXT:    vand.vv v11, v11, v12
+; RV32-NEXT:    vsll.vx v8, v8, a1
+; RV32-NEXT:    vor.vv v8, v8, v11
+; RV32-NEXT:    vor.vv v8, v8, v10
+; RV32-NEXT:    vor.vv v8, v8, v9
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
 ;
-; LMULMAX1-RV64-LABEL: bswap_v2i64:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    li a1, 56
-; LMULMAX1-RV64-NEXT:    vsrl.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    li a2, 40
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v8, a2
-; LMULMAX1-RV64-NEXT:    lui a3, 16
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, -256
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 24
-; LMULMAX1-RV64-NEXT:    lui a3, 4080
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v8, 8
-; LMULMAX1-RV64-NEXT:    li a3, 255
-; LMULMAX1-RV64-NEXT:    slli a4, a3, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a4
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsll.vi v10, v8, 8
-; LMULMAX1-RV64-NEXT:    slli a4, a3, 32
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV64-NEXT:    vsll.vi v11, v8, 24
-; LMULMAX1-RV64-NEXT:    slli a4, a3, 40
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a4
-; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV64-NEXT:    vsll.vx v11, v8, a1
-; LMULMAX1-RV64-NEXT:    vsll.vx v8, v8, a2
-; LMULMAX1-RV64-NEXT:    slli a1, a3, 48
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v11, v8
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; RV64-LABEL: bswap_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    li a1, 56
+; RV64-NEXT:    vsrl.vx v9, v8, a1
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    vsrl.vx v10, v8, a2
+; RV64-NEXT:    lui a3, 16
+; RV64-NEXT:    addiw a3, a3, -256
+; RV64-NEXT:    vand.vx v10, v10, a3
+; RV64-NEXT:    vor.vv v9, v10, v9
+; RV64-NEXT:    vsrl.vi v10, v8, 24
+; RV64-NEXT:    lui a3, 4080
+; RV64-NEXT:    vand.vx v10, v10, a3
+; RV64-NEXT:    vsrl.vi v11, v8, 8
+; RV64-NEXT:    li a3, 255
+; RV64-NEXT:    slli a4, a3, 24
+; RV64-NEXT:    vand.vx v11, v11, a4
+; RV64-NEXT:    vor.vv v10, v11, v10
+; RV64-NEXT:    vor.vv v9, v10, v9
+; RV64-NEXT:    vsll.vi v10, v8, 8
+; RV64-NEXT:    slli a4, a3, 32
+; RV64-NEXT:    vand.vx v10, v10, a4
+; RV64-NEXT:    vsll.vi v11, v8, 24
+; RV64-NEXT:    slli a4, a3, 40
+; RV64-NEXT:    vand.vx v11, v11, a4
+; RV64-NEXT:    vor.vv v10, v11, v10
+; RV64-NEXT:    vsll.vx v11, v8, a1
+; RV64-NEXT:    vsll.vx v8, v8, a2
+; RV64-NEXT:    slli a1, a3, 48
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vor.vv v8, v11, v8
+; RV64-NEXT:    vor.vv v8, v8, v10
+; RV64-NEXT:    vor.vv v8, v8, v9
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
   %a = load <2 x i64>, <2 x i64>* %x
   %b = load <2 x i64>, <2 x i64>* %y
   %c = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index e90ee2480d80e..9daaf921251cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -1,155 +1,59 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I
-; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV64
 
 define void @ctlz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind {
-; LMULMAX2-RV32-LABEL: ctlz_v16i8:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32-NEXT:    li a1, 85
-; LMULMAX2-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    li a1, 51
-; LMULMAX2-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: ctlz_v16i8:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX2-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64-NEXT:    li a1, 85
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    li a1, 51
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: ctlz_v16i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    li a1, 85
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    li a1, 51
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctlz_v16i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    li a1, 85
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    li a1, 51
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ctlz_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vxor.vi v8, v8, -1
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    li a1, 85
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    li a1, 51
+; CHECK-NEXT:    vand.vx v9, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    vand.vi v8, v8, 15
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
-; LMULMAX8-RV32-LABEL: ctlz_v16i8:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX8-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; LMULMAX8-RV32-NEXT:    vzext.vf4 v12, v8
-; LMULMAX8-RV32-NEXT:    vfcvt.f.xu.v v12, v12
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v10, v12, 23
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wx v9, v10, zero
-; LMULMAX8-RV32-NEXT:    li a1, 134
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    vrsub.vx v8, v9, a1
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 8, v0
-; LMULMAX8-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: ctlz_v16i8:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX8-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; LMULMAX8-RV64-NEXT:    vzext.vf4 v12, v8
-; LMULMAX8-RV64-NEXT:    vfcvt.f.xu.v v12, v12
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v10, v12, 23
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wx v9, v10, zero
-; LMULMAX8-RV64-NEXT:    li a1, 134
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    vrsub.vx v8, v9, a1
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 8, v0
-; LMULMAX8-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: ctlz_v16i8:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; LMULMAX8-NEXT:    vle8.v v8, (a0)
+; LMULMAX8-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; LMULMAX8-NEXT:    vzext.vf4 v12, v8
+; LMULMAX8-NEXT:    vfcvt.f.xu.v v12, v12
+; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; LMULMAX8-NEXT:    vnsrl.wi v10, v12, 23
+; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
+; LMULMAX8-NEXT:    vnsrl.wx v9, v10, zero
+; LMULMAX8-NEXT:    li a1, 134
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    vrsub.vx v8, v9, a1
+; LMULMAX8-NEXT:    vmerge.vim v8, v8, 8, v0
+; LMULMAX8-NEXT:    vse8.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <16 x i8>, <16 x i8>* %x
   %b = load <16 x i8>, <16 x i8>* %y
   %c = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
@@ -327,33 +231,19 @@ define void @ctlz_v8i16(<8 x i16>* %x, <8 x i16>* %y) nounwind {
 ; LMULMAX2-RV64D-NEXT:    vse16.v v8, (a0)
 ; LMULMAX2-RV64D-NEXT:    ret
 ;
-; LMULMAX8-RV32-LABEL: ctlz_v8i16:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX8-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v9, v10, 23
-; LMULMAX8-RV32-NEXT:    li a1, 142
-; LMULMAX8-RV32-NEXT:    vrsub.vx v9, v9, a1
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    li a1, 16
-; LMULMAX8-RV32-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: ctlz_v8i16:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX8-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v9, v10, 23
-; LMULMAX8-RV64-NEXT:    li a1, 142
-; LMULMAX8-RV64-NEXT:    vrsub.vx v9, v9, a1
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    li a1, 16
-; LMULMAX8-RV64-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: ctlz_v8i16:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; LMULMAX8-NEXT:    vle16.v v8, (a0)
+; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v8
+; LMULMAX8-NEXT:    vnsrl.wi v9, v10, 23
+; LMULMAX8-NEXT:    li a1, 142
+; LMULMAX8-NEXT:    vrsub.vx v9, v9, a1
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    li a1, 16
+; LMULMAX8-NEXT:    vmerge.vxm v8, v9, a1, v0
+; LMULMAX8-NEXT:    vse16.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <8 x i16>, <8 x i16>* %x
   %b = load <8 x i16>, <8 x i16>* %y
   %c = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
@@ -545,35 +435,20 @@ define void @ctlz_v4i32(<4 x i32>* %x, <4 x i32>* %y) nounwind {
 ; LMULMAX2-RV64D-NEXT:    vse32.v v8, (a0)
 ; LMULMAX2-RV64D-NEXT:    ret
 ;
-; LMULMAX8-RV32-LABEL: ctlz_v4i32:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX8-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8-RV32-NEXT:    li a1, 52
-; LMULMAX8-RV32-NEXT:    vnsrl.wx v9, v10, a1
-; LMULMAX8-RV32-NEXT:    li a1, 1054
-; LMULMAX8-RV32-NEXT:    vrsub.vx v9, v9, a1
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    li a1, 32
-; LMULMAX8-RV32-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: ctlz_v4i32:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX8-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vfwcvt.f.xu.v v10, v8
-; LMULMAX8-RV64-NEXT:    li a1, 52
-; LMULMAX8-RV64-NEXT:    vnsrl.wx v9, v10, a1
-; LMULMAX8-RV64-NEXT:    li a1, 1054
-; LMULMAX8-RV64-NEXT:    vrsub.vx v9, v9, a1
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    li a1, 32
-; LMULMAX8-RV64-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: ctlz_v4i32:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; LMULMAX8-NEXT:    vle32.v v8, (a0)
+; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v8
+; LMULMAX8-NEXT:    li a1, 52
+; LMULMAX8-NEXT:    vnsrl.wx v9, v10, a1
+; LMULMAX8-NEXT:    li a1, 1054
+; LMULMAX8-NEXT:    vrsub.vx v9, v9, a1
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    li a1, 32
+; LMULMAX8-NEXT:    vmerge.vxm v8, v9, a1, v0
+; LMULMAX8-NEXT:    vse32.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <4 x i32>, <4 x i32>* %x
   %b = load <4 x i32>, <4 x i32>* %y
   %c = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
@@ -888,189 +763,97 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) nounwind {
 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
 
 define void @ctlz_v32i8(<32 x i8>* %x, <32 x i8>* %y) nounwind {
-; LMULMAX2-RV32-LABEL: ctlz_v32i8:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    li a1, 32
-; LMULMAX2-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; LMULMAX2-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    li a1, 85
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    li a1, 51
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
+; LMULMAX2-LABEL: ctlz_v32i8:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    li a1, 32
+; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
+; LMULMAX2-NEXT:    vle8.v v8, (a0)
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 2
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vor.vv v8, v8, v10
+; LMULMAX2-NEXT:    vxor.vi v8, v8, -1
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    li a1, 85
+; LMULMAX2-NEXT:    vand.vx v10, v10, a1
+; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX2-NEXT:    li a1, 51
+; LMULMAX2-NEXT:    vand.vx v10, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    vand.vi v8, v8, 15
+; LMULMAX2-NEXT:    vse8.v v8, (a0)
+; LMULMAX2-NEXT:    ret
 ;
-; LMULMAX2-RV64-LABEL: ctlz_v32i8:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    li a1, 32
-; LMULMAX2-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; LMULMAX2-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    li a1, 85
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    li a1, 51
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
+; LMULMAX1-LABEL: ctlz_v32i8:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    vle8.v v8, (a1)
+; LMULMAX1-NEXT:    vle8.v v9, (a0)
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 2
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vor.vv v8, v8, v10
+; LMULMAX1-NEXT:    vxor.vi v8, v8, -1
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    li a2, 85
+; LMULMAX1-NEXT:    vand.vx v10, v10, a2
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX1-NEXT:    li a3, 51
+; LMULMAX1-NEXT:    vand.vx v10, v8, a3
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a3
+; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX1-NEXT:    vand.vi v8, v8, 15
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 2
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vor.vv v9, v9, v10
+; LMULMAX1-NEXT:    vxor.vi v9, v9, -1
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vand.vx v10, v10, a2
+; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v10, v9, a3
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT:    vand.vx v9, v9, a3
+; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vi v9, v9, 15
+; LMULMAX1-NEXT:    vse8.v v9, (a0)
+; LMULMAX1-NEXT:    vse8.v v8, (a1)
+; LMULMAX1-NEXT:    ret
 ;
-; LMULMAX1-RV32-LABEL: ctlz_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    li a2, 85
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    li a3, 51
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vxor.vi v9, v9, -1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vi v9, v9, 15
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ctlz_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    li a2, 85
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    li a3, 51
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v8, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vxor.vi v9, v9, -1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v9, a3
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a3
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vi v9, v9, 15
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: ctlz_v32i8:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    li a1, 32
-; LMULMAX8-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; LMULMAX8-RV32-NEXT:    vzext.vf4 v16, v8
-; LMULMAX8-RV32-NEXT:    vfcvt.f.xu.v v16, v16
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v12, v16, 23
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wx v10, v12, zero
-; LMULMAX8-RV32-NEXT:    li a1, 134
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    vrsub.vx v8, v10, a1
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 8, v0
-; LMULMAX8-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: ctlz_v32i8:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    li a1, 32
-; LMULMAX8-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; LMULMAX8-RV64-NEXT:    vzext.vf4 v16, v8
-; LMULMAX8-RV64-NEXT:    vfcvt.f.xu.v v16, v16
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v12, v16, 23
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wx v10, v12, zero
-; LMULMAX8-RV64-NEXT:    li a1, 134
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    vrsub.vx v8, v10, a1
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 8, v0
-; LMULMAX8-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: ctlz_v32i8:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    li a1, 32
+; LMULMAX8-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
+; LMULMAX8-NEXT:    vle8.v v8, (a0)
+; LMULMAX8-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
+; LMULMAX8-NEXT:    vzext.vf4 v16, v8
+; LMULMAX8-NEXT:    vfcvt.f.xu.v v16, v16
+; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; LMULMAX8-NEXT:    vnsrl.wi v12, v16, 23
+; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
+; LMULMAX8-NEXT:    vnsrl.wx v10, v12, zero
+; LMULMAX8-NEXT:    li a1, 134
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    vrsub.vx v8, v10, a1
+; LMULMAX8-NEXT:    vmerge.vim v8, v8, 8, v0
+; LMULMAX8-NEXT:    vse8.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <32 x i8>, <32 x i8>* %x
   %b = load <32 x i8>, <32 x i8>* %y
   %c = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false)
@@ -1268,33 +1051,19 @@ define void @ctlz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
 ; LMULMAX1-RV64-NEXT:    ret
 ;
-; LMULMAX8-RV32-LABEL: ctlz_v16i16:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vfwcvt.f.xu.v v12, v8
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v10, v12, 23
-; LMULMAX8-RV32-NEXT:    li a1, 142
-; LMULMAX8-RV32-NEXT:    vrsub.vx v10, v10, a1
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    li a1, 16
-; LMULMAX8-RV32-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: ctlz_v16i16:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vfwcvt.f.xu.v v12, v8
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v10, v12, 23
-; LMULMAX8-RV64-NEXT:    li a1, 142
-; LMULMAX8-RV64-NEXT:    vrsub.vx v10, v10, a1
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    li a1, 16
-; LMULMAX8-RV64-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: ctlz_v16i16:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
+; LMULMAX8-NEXT:    vle16.v v8, (a0)
+; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v8
+; LMULMAX8-NEXT:    vnsrl.wi v10, v12, 23
+; LMULMAX8-NEXT:    li a1, 142
+; LMULMAX8-NEXT:    vrsub.vx v10, v10, a1
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    li a1, 16
+; LMULMAX8-NEXT:    vmerge.vxm v8, v10, a1, v0
+; LMULMAX8-NEXT:    vse16.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <16 x i16>, <16 x i16>* %x
   %b = load <16 x i16>, <16 x i16>* %y
   %c = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false)
@@ -1508,35 +1277,20 @@ define void @ctlz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vse32.v v8, (a1)
 ; LMULMAX1-RV64-NEXT:    ret
 ;
-; LMULMAX8-RV32-LABEL: ctlz_v8i32:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vfwcvt.f.xu.v v12, v8
-; LMULMAX8-RV32-NEXT:    li a1, 52
-; LMULMAX8-RV32-NEXT:    vnsrl.wx v10, v12, a1
-; LMULMAX8-RV32-NEXT:    li a1, 1054
-; LMULMAX8-RV32-NEXT:    vrsub.vx v10, v10, a1
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    li a1, 32
-; LMULMAX8-RV32-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: ctlz_v8i32:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vfwcvt.f.xu.v v12, v8
-; LMULMAX8-RV64-NEXT:    li a1, 52
-; LMULMAX8-RV64-NEXT:    vnsrl.wx v10, v12, a1
-; LMULMAX8-RV64-NEXT:    li a1, 1054
-; LMULMAX8-RV64-NEXT:    vrsub.vx v10, v10, a1
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    li a1, 32
-; LMULMAX8-RV64-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: ctlz_v8i32:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; LMULMAX8-NEXT:    vle32.v v8, (a0)
+; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v8
+; LMULMAX8-NEXT:    li a1, 52
+; LMULMAX8-NEXT:    vnsrl.wx v10, v12, a1
+; LMULMAX8-NEXT:    li a1, 1054
+; LMULMAX8-NEXT:    vrsub.vx v10, v10, a1
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    li a1, 32
+; LMULMAX8-NEXT:    vmerge.vxm v8, v10, a1, v0
+; LMULMAX8-NEXT:    vse32.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <8 x i32>, <8 x i32>* %x
   %b = load <8 x i32>, <8 x i32>* %y
   %c = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 9acfb3cb7ef81..b4c51f1da83b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -1,147 +1,58 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I
-; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV64
 
 define void @cttz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind {
-; LMULMAX2-RV32-LABEL: cttz_v16i8:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX2-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    li a1, 1
-; LMULMAX2-RV32-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV32-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV32-NEXT:    li a1, 85
-; LMULMAX2-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    li a1, 51
-; LMULMAX2-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV32-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: cttz_v16i8:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX2-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    li a1, 1
-; LMULMAX2-RV64-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX2-RV64-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX2-RV64-NEXT:    vand.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX2-RV64-NEXT:    li a1, 85
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    li a1, 51
-; LMULMAX2-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX2-RV64-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: cttz_v16i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    li a1, 1
-; LMULMAX1-RV32-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV32-NEXT:    li a1, 85
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    li a1, 51
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: cttz_v16i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    li a1, 1
-; LMULMAX1-RV64-NEXT:    vsub.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 1
-; LMULMAX1-RV64-NEXT:    li a1, 85
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    li a1, 51
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v8, a1
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v9, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: cttz_v16i8:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX8-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-RV32-NEXT:    vand.vv v9, v8, v9
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; LMULMAX8-RV32-NEXT:    vzext.vf4 v12, v9
-; LMULMAX8-RV32-NEXT:    vfcvt.f.xu.v v12, v12
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v10, v12, 23
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wx v9, v10, zero
-; LMULMAX8-RV32-NEXT:    li a1, 127
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    vsub.vx v8, v9, a1
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 8, v0
-; LMULMAX8-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
+; CHECK-LABEL: cttz_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    vsub.vx v9, v8, a1
+; CHECK-NEXT:    vxor.vi v8, v8, -1
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-NEXT:    li a1, 85
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    li a1, 51
+; CHECK-NEXT:    vand.vx v9, v8, a1
+; CHECK-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vadd.vv v8, v9, v8
+; CHECK-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    vand.vi v8, v8, 15
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
 ;
-; LMULMAX8-RV64-LABEL: cttz_v16i8:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX8-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-RV64-NEXT:    vand.vv v9, v8, v9
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; LMULMAX8-RV64-NEXT:    vzext.vf4 v12, v9
-; LMULMAX8-RV64-NEXT:    vfcvt.f.xu.v v12, v12
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v10, v12, 23
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wx v9, v10, zero
-; LMULMAX8-RV64-NEXT:    li a1, 127
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    vsub.vx v8, v9, a1
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 8, v0
-; LMULMAX8-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: cttz_v16i8:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; LMULMAX8-NEXT:    vle8.v v8, (a0)
+; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
+; LMULMAX8-NEXT:    vand.vv v9, v8, v9
+; LMULMAX8-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; LMULMAX8-NEXT:    vzext.vf4 v12, v9
+; LMULMAX8-NEXT:    vfcvt.f.xu.v v12, v12
+; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; LMULMAX8-NEXT:    vnsrl.wi v10, v12, 23
+; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
+; LMULMAX8-NEXT:    vnsrl.wx v9, v10, zero
+; LMULMAX8-NEXT:    li a1, 127
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    vsub.vx v8, v9, a1
+; LMULMAX8-NEXT:    vmerge.vim v8, v8, 8, v0
+; LMULMAX8-NEXT:    vse8.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <16 x i8>, <16 x i8>* %x
   %b = load <16 x i8>, <16 x i8>* %y
   %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
@@ -303,37 +214,21 @@ define void @cttz_v8i16(<8 x i16>* %x, <8 x i16>* %y) nounwind {
 ; LMULMAX2-RV64D-NEXT:    vse16.v v8, (a0)
 ; LMULMAX2-RV64D-NEXT:    ret
 ;
-; LMULMAX8-RV32-LABEL: cttz_v8i16:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX8-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-RV32-NEXT:    vand.vv v9, v8, v9
-; LMULMAX8-RV32-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v9, v10, 23
-; LMULMAX8-RV32-NEXT:    li a1, 127
-; LMULMAX8-RV32-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    li a1, 16
-; LMULMAX8-RV32-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: cttz_v8i16:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX8-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-RV64-NEXT:    vand.vv v9, v8, v9
-; LMULMAX8-RV64-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v9, v10, 23
-; LMULMAX8-RV64-NEXT:    li a1, 127
-; LMULMAX8-RV64-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    li a1, 16
-; LMULMAX8-RV64-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: cttz_v8i16:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; LMULMAX8-NEXT:    vle16.v v8, (a0)
+; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
+; LMULMAX8-NEXT:    vand.vv v9, v8, v9
+; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v9
+; LMULMAX8-NEXT:    vnsrl.wi v9, v10, 23
+; LMULMAX8-NEXT:    li a1, 127
+; LMULMAX8-NEXT:    vsub.vx v9, v9, a1
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    li a1, 16
+; LMULMAX8-NEXT:    vmerge.vxm v8, v9, a1, v0
+; LMULMAX8-NEXT:    vse16.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <8 x i16>, <8 x i16>* %x
   %b = load <8 x i16>, <8 x i16>* %y
   %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
@@ -501,39 +396,22 @@ define void @cttz_v4i32(<4 x i32>* %x, <4 x i32>* %y) nounwind {
 ; LMULMAX2-RV64D-NEXT:    vse32.v v8, (a0)
 ; LMULMAX2-RV64D-NEXT:    ret
 ;
-; LMULMAX8-RV32-LABEL: cttz_v4i32:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX8-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-RV32-NEXT:    vand.vv v9, v8, v9
-; LMULMAX8-RV32-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX8-RV32-NEXT:    li a1, 52
-; LMULMAX8-RV32-NEXT:    vnsrl.wx v9, v10, a1
-; LMULMAX8-RV32-NEXT:    li a1, 1023
-; LMULMAX8-RV32-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    li a1, 32
-; LMULMAX8-RV32-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: cttz_v4i32:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX8-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX8-RV64-NEXT:    vand.vv v9, v8, v9
-; LMULMAX8-RV64-NEXT:    vfwcvt.f.xu.v v10, v9
-; LMULMAX8-RV64-NEXT:    li a1, 52
-; LMULMAX8-RV64-NEXT:    vnsrl.wx v9, v10, a1
-; LMULMAX8-RV64-NEXT:    li a1, 1023
-; LMULMAX8-RV64-NEXT:    vsub.vx v9, v9, a1
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    li a1, 32
-; LMULMAX8-RV64-NEXT:    vmerge.vxm v8, v9, a1, v0
-; LMULMAX8-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: cttz_v4i32:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; LMULMAX8-NEXT:    vle32.v v8, (a0)
+; LMULMAX8-NEXT:    vrsub.vi v9, v8, 0
+; LMULMAX8-NEXT:    vand.vv v9, v8, v9
+; LMULMAX8-NEXT:    vfwcvt.f.xu.v v10, v9
+; LMULMAX8-NEXT:    li a1, 52
+; LMULMAX8-NEXT:    vnsrl.wx v9, v10, a1
+; LMULMAX8-NEXT:    li a1, 1023
+; LMULMAX8-NEXT:    vsub.vx v9, v9, a1
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    li a1, 32
+; LMULMAX8-NEXT:    vmerge.vxm v8, v9, a1, v0
+; LMULMAX8-NEXT:    vse32.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <4 x i32>, <4 x i32>* %x
   %b = load <4 x i32>, <4 x i32>* %y
   %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
@@ -788,173 +666,89 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) nounwind {
 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
 
 define void @cttz_v32i8(<32 x i8>* %x, <32 x i8>* %y) nounwind {
-; LMULMAX2-RV32-LABEL: cttz_v32i8:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    li a1, 32
-; LMULMAX2-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; LMULMAX2-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    li a1, 1
-; LMULMAX2-RV32-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX2-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV32-NEXT:    li a1, 85
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    li a1, 51
-; LMULMAX2-RV32-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
+; LMULMAX2-LABEL: cttz_v32i8:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    li a1, 32
+; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
+; LMULMAX2-NEXT:    vle8.v v8, (a0)
+; LMULMAX2-NEXT:    li a1, 1
+; LMULMAX2-NEXT:    vsub.vx v10, v8, a1
+; LMULMAX2-NEXT:    vxor.vi v8, v8, -1
+; LMULMAX2-NEXT:    vand.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX2-NEXT:    li a1, 85
+; LMULMAX2-NEXT:    vand.vx v10, v10, a1
+; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX2-NEXT:    li a1, 51
+; LMULMAX2-NEXT:    vand.vx v10, v8, a1
+; LMULMAX2-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX2-NEXT:    vand.vx v8, v8, a1
+; LMULMAX2-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX2-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    vand.vi v8, v8, 15
+; LMULMAX2-NEXT:    vse8.v v8, (a0)
+; LMULMAX2-NEXT:    ret
 ;
-; LMULMAX2-RV64-LABEL: cttz_v32i8:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    li a1, 32
-; LMULMAX2-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; LMULMAX2-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    li a1, 1
-; LMULMAX2-RV64-NEXT:    vsub.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX2-RV64-NEXT:    vand.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX2-RV64-NEXT:    li a1, 85
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    li a1, 51
-; LMULMAX2-RV64-NEXT:    vand.vx v10, v8, a1
-; LMULMAX2-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX2-RV64-NEXT:    vand.vx v8, v8, a1
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vand.vi v8, v8, 15
-; LMULMAX2-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
+; LMULMAX1-LABEL: cttz_v32i8:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    vle8.v v8, (a1)
+; LMULMAX1-NEXT:    vle8.v v9, (a0)
+; LMULMAX1-NEXT:    li a2, 1
+; LMULMAX1-NEXT:    vsub.vx v10, v8, a2
+; LMULMAX1-NEXT:    vxor.vi v8, v8, -1
+; LMULMAX1-NEXT:    vand.vv v8, v8, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 1
+; LMULMAX1-NEXT:    li a3, 85
+; LMULMAX1-NEXT:    vand.vx v10, v10, a3
+; LMULMAX1-NEXT:    vsub.vv v8, v8, v10
+; LMULMAX1-NEXT:    li a4, 51
+; LMULMAX1-NEXT:    vand.vx v10, v8, a4
+; LMULMAX1-NEXT:    vsrl.vi v8, v8, 2
+; LMULMAX1-NEXT:    vand.vx v8, v8, a4
+; LMULMAX1-NEXT:    vadd.vv v8, v10, v8
+; LMULMAX1-NEXT:    vsrl.vi v10, v8, 4
+; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX1-NEXT:    vand.vi v8, v8, 15
+; LMULMAX1-NEXT:    vsub.vx v10, v9, a2
+; LMULMAX1-NEXT:    vxor.vi v9, v9, -1
+; LMULMAX1-NEXT:    vand.vv v9, v9, v10
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 1
+; LMULMAX1-NEXT:    vand.vx v10, v10, a3
+; LMULMAX1-NEXT:    vsub.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vx v10, v9, a4
+; LMULMAX1-NEXT:    vsrl.vi v9, v9, 2
+; LMULMAX1-NEXT:    vand.vx v9, v9, a4
+; LMULMAX1-NEXT:    vadd.vv v9, v10, v9
+; LMULMAX1-NEXT:    vsrl.vi v10, v9, 4
+; LMULMAX1-NEXT:    vadd.vv v9, v9, v10
+; LMULMAX1-NEXT:    vand.vi v9, v9, 15
+; LMULMAX1-NEXT:    vse8.v v9, (a0)
+; LMULMAX1-NEXT:    vse8.v v8, (a1)
+; LMULMAX1-NEXT:    ret
 ;
-; LMULMAX1-RV32-LABEL: cttz_v32i8:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV32-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    li a2, 1
-; LMULMAX1-RV32-NEXT:    vsub.vx v10, v8, a2
-; LMULMAX1-RV32-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    li a3, 85
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    li a4, 51
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v8, a4
-; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV32-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-RV32-NEXT:    vsub.vx v10, v9, a2
-; LMULMAX1-RV32-NEXT:    vxor.vi v9, v9, -1
-; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v9, a4
-; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vand.vi v9, v9, 15
-; LMULMAX1-RV32-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: cttz_v32i8:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle8.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    vle8.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    li a2, 1
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v8, a2
-; LMULMAX1-RV64-NEXT:    vxor.vi v8, v8, -1
-; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    li a3, 85
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    li a4, 51
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v8, a4
-; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vand.vi v8, v8, 15
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v9, a2
-; LMULMAX1-RV64-NEXT:    vxor.vi v9, v9, -1
-; LMULMAX1-RV64-NEXT:    vand.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v9, a4
-; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vand.vi v9, v9, 15
-; LMULMAX1-RV64-NEXT:    vse8.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse8.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    ret
-;
-; LMULMAX8-RV32-LABEL: cttz_v32i8:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    li a1, 32
-; LMULMAX8-RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-RV32-NEXT:    vand.vv v10, v8, v10
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; LMULMAX8-RV32-NEXT:    vzext.vf4 v16, v10
-; LMULMAX8-RV32-NEXT:    vfcvt.f.xu.v v16, v16
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v12, v16, 23
-; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wx v10, v12, zero
-; LMULMAX8-RV32-NEXT:    li a1, 127
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    vsub.vx v8, v10, a1
-; LMULMAX8-RV32-NEXT:    vmerge.vim v8, v8, 8, v0
-; LMULMAX8-RV32-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: cttz_v32i8:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    li a1, 32
-; LMULMAX8-RV64-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vle8.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-RV64-NEXT:    vand.vv v10, v8, v10
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; LMULMAX8-RV64-NEXT:    vzext.vf4 v16, v10
-; LMULMAX8-RV64-NEXT:    vfcvt.f.xu.v v16, v16
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v12, v16, 23
-; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wx v10, v12, zero
-; LMULMAX8-RV64-NEXT:    li a1, 127
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    vsub.vx v8, v10, a1
-; LMULMAX8-RV64-NEXT:    vmerge.vim v8, v8, 8, v0
-; LMULMAX8-RV64-NEXT:    vse8.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: cttz_v32i8:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    li a1, 32
+; LMULMAX8-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
+; LMULMAX8-NEXT:    vle8.v v8, (a0)
+; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
+; LMULMAX8-NEXT:    vand.vv v10, v8, v10
+; LMULMAX8-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
+; LMULMAX8-NEXT:    vzext.vf4 v16, v10
+; LMULMAX8-NEXT:    vfcvt.f.xu.v v16, v16
+; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; LMULMAX8-NEXT:    vnsrl.wi v12, v16, 23
+; LMULMAX8-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
+; LMULMAX8-NEXT:    vnsrl.wx v10, v12, zero
+; LMULMAX8-NEXT:    li a1, 127
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    vsub.vx v8, v10, a1
+; LMULMAX8-NEXT:    vmerge.vim v8, v8, 8, v0
+; LMULMAX8-NEXT:    vse8.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <32 x i8>, <32 x i8>* %x
   %b = load <32 x i8>, <32 x i8>* %y
   %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
@@ -1120,37 +914,21 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
 ; LMULMAX1-RV64-NEXT:    ret
 ;
-; LMULMAX8-RV32-LABEL: cttz_v16i16:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-RV32-NEXT:    vand.vv v10, v8, v10
-; LMULMAX8-RV32-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v10, v12, 23
-; LMULMAX8-RV32-NEXT:    li a1, 127
-; LMULMAX8-RV32-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    li a1, 16
-; LMULMAX8-RV32-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: cttz_v16i16:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-RV64-NEXT:    vand.vv v10, v8, v10
-; LMULMAX8-RV64-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v10, v12, 23
-; LMULMAX8-RV64-NEXT:    li a1, 127
-; LMULMAX8-RV64-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    li a1, 16
-; LMULMAX8-RV64-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-RV64-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: cttz_v16i16:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
+; LMULMAX8-NEXT:    vle16.v v8, (a0)
+; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
+; LMULMAX8-NEXT:    vand.vv v10, v8, v10
+; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v10
+; LMULMAX8-NEXT:    vnsrl.wi v10, v12, 23
+; LMULMAX8-NEXT:    li a1, 127
+; LMULMAX8-NEXT:    vsub.vx v10, v10, a1
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    li a1, 16
+; LMULMAX8-NEXT:    vmerge.vxm v8, v10, a1, v0
+; LMULMAX8-NEXT:    vse16.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <16 x i16>, <16 x i16>* %x
   %b = load <16 x i16>, <16 x i16>* %y
   %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
@@ -1320,39 +1098,22 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vse32.v v8, (a1)
 ; LMULMAX1-RV64-NEXT:    ret
 ;
-; LMULMAX8-RV32-LABEL: cttz_v8i32:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-RV32-NEXT:    vand.vv v10, v8, v10
-; LMULMAX8-RV32-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-RV32-NEXT:    li a1, 52
-; LMULMAX8-RV32-NEXT:    vnsrl.wx v10, v12, a1
-; LMULMAX8-RV32-NEXT:    li a1, 1023
-; LMULMAX8-RV32-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV32-NEXT:    li a1, 32
-; LMULMAX8-RV32-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: cttz_v8i32:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    vrsub.vi v10, v8, 0
-; LMULMAX8-RV64-NEXT:    vand.vv v10, v8, v10
-; LMULMAX8-RV64-NEXT:    vfwcvt.f.xu.v v12, v10
-; LMULMAX8-RV64-NEXT:    li a1, 52
-; LMULMAX8-RV64-NEXT:    vnsrl.wx v10, v12, a1
-; LMULMAX8-RV64-NEXT:    li a1, 1023
-; LMULMAX8-RV64-NEXT:    vsub.vx v10, v10, a1
-; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
-; LMULMAX8-RV64-NEXT:    li a1, 32
-; LMULMAX8-RV64-NEXT:    vmerge.vxm v8, v10, a1, v0
-; LMULMAX8-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-RV64-NEXT:    ret
+; LMULMAX8-LABEL: cttz_v8i32:
+; LMULMAX8:       # %bb.0:
+; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; LMULMAX8-NEXT:    vle32.v v8, (a0)
+; LMULMAX8-NEXT:    vrsub.vi v10, v8, 0
+; LMULMAX8-NEXT:    vand.vv v10, v8, v10
+; LMULMAX8-NEXT:    vfwcvt.f.xu.v v12, v10
+; LMULMAX8-NEXT:    li a1, 52
+; LMULMAX8-NEXT:    vnsrl.wx v10, v12, a1
+; LMULMAX8-NEXT:    li a1, 1023
+; LMULMAX8-NEXT:    vsub.vx v10, v10, a1
+; LMULMAX8-NEXT:    vmseq.vi v0, v8, 0
+; LMULMAX8-NEXT:    li a1, 32
+; LMULMAX8-NEXT:    vmerge.vxm v8, v10, a1, v0
+; LMULMAX8-NEXT:    vse32.v v8, (a0)
+; LMULMAX8-NEXT:    ret
   %a = load <8 x i32>, <8 x i32>* %x
   %b = load <8 x i32>, <8 x i32>* %y
   %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)


        


More information about the llvm-commits mailing list