[llvm] 9d2350f - [RISCV][NFC] Reorganize check prefixes in some tests to reduce redundant lines

Ben Shi via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 13 03:01:20 PDT 2022


Author: Ping Deng
Date: 2022-04-13T10:01:07Z
New Revision: 9d2350fd19f5a36fabd9b2c225af582b131ffce6

URL: https://github.com/llvm/llvm-project/commit/9d2350fd19f5a36fabd9b2c225af582b131ffce6
DIFF: https://github.com/llvm/llvm-project/commit/9d2350fd19f5a36fabd9b2c225af582b131ffce6.diff

LOG: [RISCV][NFC] Reorganize check prefixes in some tests to reduce redundant lines

Reviewed By: benshi001, craig.topper, frasercrmck

Differential Revision: https://reviews.llvm.org/D123176

Added: 
    

Modified: 
    llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
    llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
    llvm/test/CodeGen/RISCV/ssub_sat.ll
    llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
    llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index ae0b685239bbc..b4385b548ecef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -1,419 +1,224 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I
-; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64
 
 define <vscale x 1 x i8> @ctlz_nxv1i8(<vscale x 1 x i8> %va) {
-; RV32I-LABEL: ctlz_nxv1i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv1i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: ctlz_nxv1i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vzext.vf4 v9, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v9, v9
-; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v9
-; RV32D-NEXT:    li a0, 134
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    vrsub.vx v8, v9, a0
-; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv1i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vzext.vf4 v9, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v9, v9
-; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v9
-; RV64D-NEXT:    li a0, 134
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    vrsub.vx v8, v9, a0
-; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv1i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: ctlz_nxv1i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v9, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v9, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
+; CHECK-D-NEXT:    li a0, 134
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    vrsub.vx v8, v9, a0
+; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 false)
   ret <vscale x 1 x i8> %a
 }
 declare <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8>, i1)
 
 define <vscale x 2 x i8> @ctlz_nxv2i8(<vscale x 2 x i8> %va) {
-; RV32I-LABEL: ctlz_nxv2i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv2i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: ctlz_nxv2i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vzext.vf4 v9, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v9, v9
-; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v9
-; RV32D-NEXT:    li a0, 134
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    vrsub.vx v8, v9, a0
-; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv2i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vzext.vf4 v9, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v9, v9
-; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v9
-; RV64D-NEXT:    li a0, 134
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    vrsub.vx v8, v9, a0
-; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv2i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: ctlz_nxv2i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v9, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v9, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
+; CHECK-D-NEXT:    li a0, 134
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    vrsub.vx v8, v9, a0
+; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 false)
   ret <vscale x 2 x i8> %a
 }
 declare <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8>, i1)
 
 define <vscale x 4 x i8> @ctlz_nxv4i8(<vscale x 4 x i8> %va) {
-; RV32I-LABEL: ctlz_nxv4i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv4i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: ctlz_nxv4i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vzext.vf4 v10, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v10, v10
-; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v10, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v9
-; RV32D-NEXT:    li a0, 134
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    vrsub.vx v8, v9, a0
-; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv4i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vzext.vf4 v10, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v10, v10
-; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v10, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v9
-; RV64D-NEXT:    li a0, 134
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    vrsub.vx v8, v9, a0
-; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv4i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: ctlz_nxv4i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v10, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v10, v10
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
+; CHECK-D-NEXT:    li a0, 134
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    vrsub.vx v8, v9, a0
+; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 false)
   ret <vscale x 4 x i8> %a
 }
 declare <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8>, i1)
 
 define <vscale x 8 x i8> @ctlz_nxv8i8(<vscale x 8 x i8> %va) {
-; RV32I-LABEL: ctlz_nxv8i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv8i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: ctlz_nxv8i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vzext.vf4 v12, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v12, v12
-; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v12, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v10
-; RV32D-NEXT:    li a0, 134
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    vrsub.vx v8, v9, a0
-; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv8i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vzext.vf4 v12, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v12, v12
-; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v12, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v10
-; RV64D-NEXT:    li a0, 134
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    vrsub.vx v8, v9, a0
-; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv8i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: ctlz_nxv8i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v12, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v12, v12
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v10, v12, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v10
+; CHECK-D-NEXT:    li a0, 134
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    vrsub.vx v8, v9, a0
+; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 false)
   ret <vscale x 8 x i8> %a
 }
 declare <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8>, i1)
 
 define <vscale x 16 x i8> @ctlz_nxv16i8(<vscale x 16 x i8> %va) {
-; RV32I-LABEL: ctlz_nxv16i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_nxv16i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: ctlz_nxv16i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; RV32D-NEXT:    vzext.vf4 v16, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v16, v16
-; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v16, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v10, v12
-; RV32D-NEXT:    li a0, 134
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    vrsub.vx v8, v10, a0
-; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv16i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; RV64D-NEXT:    vzext.vf4 v16, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v16, v16
-; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v16, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v10, v12
-; RV64D-NEXT:    li a0, 134
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    vrsub.vx v8, v10, a0
-; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_nxv16i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: ctlz_nxv16i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v16, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v16, v16
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v12, v16, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v10, v12
+; CHECK-D-NEXT:    li a0, 134
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    vrsub.vx v8, v10, a0
+; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 false)
   ret <vscale x 16 x i8> %a
 }
@@ -544,29 +349,17 @@ define <vscale x 1 x i16> @ctlz_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_nxv1i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    vnsrl.wi v9, v9, 23
-; RV32D-NEXT:    li a0, 142
-; RV32D-NEXT:    vrsub.vx v9, v9, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv1i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    vnsrl.wi v9, v9, 23
-; RV64D-NEXT:    li a0, 142
-; RV64D-NEXT:    vrsub.vx v9, v9, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_nxv1i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
+; CHECK-D-NEXT:    li a0, 142
+; CHECK-D-NEXT:    vrsub.vx v9, v9, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 16
+; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> %va, i1 false)
   ret <vscale x 1 x i16> %a
 }
@@ -639,29 +432,17 @@ define <vscale x 2 x i16> @ctlz_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_nxv2i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    vnsrl.wi v9, v9, 23
-; RV32D-NEXT:    li a0, 142
-; RV32D-NEXT:    vrsub.vx v9, v9, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv2i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    vnsrl.wi v9, v9, 23
-; RV64D-NEXT:    li a0, 142
-; RV64D-NEXT:    vrsub.vx v9, v9, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_nxv2i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
+; CHECK-D-NEXT:    li a0, 142
+; CHECK-D-NEXT:    vrsub.vx v9, v9, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 16
+; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> %va, i1 false)
   ret <vscale x 2 x i16> %a
 }
@@ -734,29 +515,17 @@ define <vscale x 4 x i16> @ctlz_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_nxv4i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV32D-NEXT:    vnsrl.wi v9, v10, 23
-; RV32D-NEXT:    li a0, 142
-; RV32D-NEXT:    vrsub.vx v9, v9, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv4i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV64D-NEXT:    vnsrl.wi v9, v10, 23
-; RV64D-NEXT:    li a0, 142
-; RV64D-NEXT:    vrsub.vx v9, v9, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_nxv4i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
+; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
+; CHECK-D-NEXT:    li a0, 142
+; CHECK-D-NEXT:    vrsub.vx v9, v9, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 16
+; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 false)
   ret <vscale x 4 x i16> %a
 }
@@ -829,29 +598,17 @@ define <vscale x 8 x i16> @ctlz_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_nxv8i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV32D-NEXT:    vnsrl.wi v10, v12, 23
-; RV32D-NEXT:    li a0, 142
-; RV32D-NEXT:    vrsub.vx v10, v10, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v10, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv8i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV64D-NEXT:    vnsrl.wi v10, v12, 23
-; RV64D-NEXT:    li a0, 142
-; RV64D-NEXT:    vrsub.vx v10, v10, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v10, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_nxv8i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
+; CHECK-D-NEXT:    vnsrl.wi v10, v12, 23
+; CHECK-D-NEXT:    li a0, 142
+; CHECK-D-NEXT:    vrsub.vx v10, v10, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 16
+; CHECK-D-NEXT:    vmerge.vxm v8, v10, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 false)
   ret <vscale x 8 x i16> %a
 }
@@ -924,29 +681,17 @@ define <vscale x 16 x i16> @ctlz_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_nxv16i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV32D-NEXT:    vnsrl.wi v12, v16, 23
-; RV32D-NEXT:    li a0, 142
-; RV32D-NEXT:    vrsub.vx v12, v12, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v12, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv16i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV64D-NEXT:    vnsrl.wi v12, v16, 23
-; RV64D-NEXT:    li a0, 142
-; RV64D-NEXT:    vrsub.vx v12, v12, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v12, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_nxv16i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
+; CHECK-D-NEXT:    vnsrl.wi v12, v16, 23
+; CHECK-D-NEXT:    li a0, 142
+; CHECK-D-NEXT:    vrsub.vx v12, v12, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 16
+; CHECK-D-NEXT:    vmerge.vxm v8, v12, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> %va, i1 false)
   ret <vscale x 16 x i16> %a
 }
@@ -1096,37 +841,21 @@ define <vscale x 1 x i32> @ctlz_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_nxv1i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV32D-NEXT:    vsrl.vx v9, v9, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v9
-; RV32D-NEXT:    li a0, 1054
-; RV32D-NEXT:    vrsub.vx v9, v9, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 32
-; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv1i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV64D-NEXT:    vsrl.vx v9, v9, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v9
-; RV64D-NEXT:    li a0, 1054
-; RV64D-NEXT:    vrsub.vx v9, v9, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_nxv1i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v9, v9, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
+; CHECK-D-NEXT:    li a0, 1054
+; CHECK-D-NEXT:    vrsub.vx v9, v9, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 32
+; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> %va, i1 false)
   ret <vscale x 1 x i32> %a
 }
@@ -1205,37 +934,21 @@ define <vscale x 2 x i32> @ctlz_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_nxv2i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32D-NEXT:    vsrl.vx v10, v10, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v10
-; RV32D-NEXT:    li a0, 1054
-; RV32D-NEXT:    vrsub.vx v9, v9, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 32
-; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv2i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64D-NEXT:    vsrl.vx v10, v10, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v10
-; RV64D-NEXT:    li a0, 1054
-; RV64D-NEXT:    vrsub.vx v9, v9, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_nxv2i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v10, v10, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v10
+; CHECK-D-NEXT:    li a0, 1054
+; CHECK-D-NEXT:    vrsub.vx v9, v9, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 32
+; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> %va, i1 false)
   ret <vscale x 2 x i32> %a
 }
@@ -1314,37 +1027,21 @@ define <vscale x 4 x i32> @ctlz_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_nxv4i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32D-NEXT:    vsrl.vx v12, v12, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v10, v12
-; RV32D-NEXT:    li a0, 1054
-; RV32D-NEXT:    vrsub.vx v10, v10, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 32
-; RV32D-NEXT:    vmerge.vxm v8, v10, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv4i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64D-NEXT:    vsrl.vx v12, v12, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v10, v12
-; RV64D-NEXT:    li a0, 1054
-; RV64D-NEXT:    vrsub.vx v10, v10, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v10, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_nxv4i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v12, v12, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v10, v12
+; CHECK-D-NEXT:    li a0, 1054
+; CHECK-D-NEXT:    vrsub.vx v10, v10, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 32
+; CHECK-D-NEXT:    vmerge.vxm v8, v10, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 false)
   ret <vscale x 4 x i32> %a
 }
@@ -1423,37 +1120,21 @@ define <vscale x 8 x i32> @ctlz_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_nxv8i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32D-NEXT:    vsrl.vx v16, v16, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v12, v16
-; RV32D-NEXT:    li a0, 1054
-; RV32D-NEXT:    vrsub.vx v12, v12, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 32
-; RV32D-NEXT:    vmerge.vxm v8, v12, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_nxv8i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64D-NEXT:    vsrl.vx v16, v16, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v12, v16
-; RV64D-NEXT:    li a0, 1054
-; RV64D-NEXT:    vrsub.vx v12, v12, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v12, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_nxv8i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v16, v16, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v12, v16
+; CHECK-D-NEXT:    li a0, 1054
+; CHECK-D-NEXT:    vrsub.vx v12, v12, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 32
+; CHECK-D-NEXT:    vmerge.vxm v8, v12, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> %va, i1 false)
   ret <vscale x 8 x i32> %a
 }
@@ -1953,391 +1634,206 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
 declare <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64>, i1)
 
 define <vscale x 1 x i8> @ctlz_zero_undef_nxv1i8(<vscale x 1 x i8> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv1i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv1i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: ctlz_zero_undef_nxv1i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vzext.vf4 v9, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v8
-; RV32D-NEXT:    li a0, 134
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv1i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vzext.vf4 v9, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v8
-; RV64D-NEXT:    li a0, 134
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv1i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: ctlz_zero_undef_nxv1i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v9, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-D-NEXT:    li a0, 134
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 true)
   ret <vscale x 1 x i8> %a
 }
 
 define <vscale x 2 x i8> @ctlz_zero_undef_nxv2i8(<vscale x 2 x i8> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv2i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv2i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: ctlz_zero_undef_nxv2i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vzext.vf4 v9, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v8
-; RV32D-NEXT:    li a0, 134
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv2i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vzext.vf4 v9, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v8
-; RV64D-NEXT:    li a0, 134
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv2i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: ctlz_zero_undef_nxv2i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v9, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-D-NEXT:    li a0, 134
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 true)
   ret <vscale x 2 x i8> %a
 }
 
 define <vscale x 4 x i8> @ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv4i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv4i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: ctlz_zero_undef_nxv4i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vzext.vf4 v10, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v10
-; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v10
-; RV32D-NEXT:    li a0, 134
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv4i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vzext.vf4 v10, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v10
-; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v10
-; RV64D-NEXT:    li a0, 134
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv4i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: ctlz_zero_undef_nxv4i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v10, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v10
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v10
+; CHECK-D-NEXT:    li a0, 134
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 true)
   ret <vscale x 4 x i8> %a
 }
 
 define <vscale x 8 x i8> @ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv8i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v9
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv8i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v9
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: ctlz_zero_undef_nxv8i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vzext.vf4 v12, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v12
-; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v12
-; RV32D-NEXT:    li a0, 134
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv8i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vzext.vf4 v12, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v12
-; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v12
-; RV64D-NEXT:    li a0, 134
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv8i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: ctlz_zero_undef_nxv8i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v12, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v12
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v12
+; CHECK-D-NEXT:    li a0, 134
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 true)
   ret <vscale x 8 x i8> %a
 }
 
 define <vscale x 16 x i8> @ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
-; RV32I-LABEL: ctlz_zero_undef_nxv16i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 2
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vor.vv v8, v8, v10
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: ctlz_zero_undef_nxv16i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 2
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vor.vv v8, v8, v10
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: ctlz_zero_undef_nxv16i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; RV32D-NEXT:    vzext.vf4 v16, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v16
-; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v16
-; RV32D-NEXT:    li a0, 134
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv16i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
-; RV64D-NEXT:    vzext.vf4 v16, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v16
-; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v16
-; RV64D-NEXT:    li a0, 134
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv16i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 2
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: ctlz_zero_undef_nxv16i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v16, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v16
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v16
+; CHECK-D-NEXT:    li a0, 134
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 true)
   ret <vscale x 16 x i8> %a
 }
@@ -2465,23 +1961,14 @@ define <vscale x 1 x i16> @ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_zero_undef_nxv1i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    vnsrl.wi v8, v9, 23
-; RV32D-NEXT:    li a0, 142
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv1i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    vnsrl.wi v8, v9, 23
-; RV64D-NEXT:    li a0, 142
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_zero_undef_nxv1i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v9, 23
+; CHECK-D-NEXT:    li a0, 142
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> %va, i1 true)
   ret <vscale x 1 x i16> %a
 }
@@ -2553,23 +2040,14 @@ define <vscale x 2 x i16> @ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_zero_undef_nxv2i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    vnsrl.wi v8, v9, 23
-; RV32D-NEXT:    li a0, 142
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv2i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    vnsrl.wi v8, v9, 23
-; RV64D-NEXT:    li a0, 142
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_zero_undef_nxv2i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v9, 23
+; CHECK-D-NEXT:    li a0, 142
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> %va, i1 true)
   ret <vscale x 2 x i16> %a
 }
@@ -2641,23 +2119,14 @@ define <vscale x 4 x i16> @ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_zero_undef_nxv4i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV32D-NEXT:    vnsrl.wi v8, v10, 23
-; RV32D-NEXT:    li a0, 142
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv4i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV64D-NEXT:    vnsrl.wi v8, v10, 23
-; RV64D-NEXT:    li a0, 142
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_zero_undef_nxv4i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v10, 23
+; CHECK-D-NEXT:    li a0, 142
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 true)
   ret <vscale x 4 x i16> %a
 }
@@ -2729,23 +2198,14 @@ define <vscale x 8 x i16> @ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_zero_undef_nxv8i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV32D-NEXT:    vnsrl.wi v8, v12, 23
-; RV32D-NEXT:    li a0, 142
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv8i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV64D-NEXT:    vnsrl.wi v8, v12, 23
-; RV64D-NEXT:    li a0, 142
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_zero_undef_nxv8i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v12, 23
+; CHECK-D-NEXT:    li a0, 142
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 true)
   ret <vscale x 8 x i16> %a
 }
@@ -2817,23 +2277,14 @@ define <vscale x 16 x i16> @ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_zero_undef_nxv16i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV32D-NEXT:    vnsrl.wi v8, v16, 23
-; RV32D-NEXT:    li a0, 142
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv16i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV64D-NEXT:    vnsrl.wi v8, v16, 23
-; RV64D-NEXT:    li a0, 142
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_zero_undef_nxv16i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v16, 23
+; CHECK-D-NEXT:    li a0, 142
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> %va, i1 true)
   ret <vscale x 16 x i16> %a
 }
@@ -2981,31 +2432,18 @@ define <vscale x 1 x i32> @ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_zero_undef_nxv1i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV32D-NEXT:    vsrl.vx v8, v9, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v8
-; RV32D-NEXT:    li a0, 1054
-; RV32D-NEXT:    vrsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv1i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v9, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v8
-; RV64D-NEXT:    li a0, 1054
-; RV64D-NEXT:    vrsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_zero_undef_nxv1i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v8, v9, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-D-NEXT:    li a0, 1054
+; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> %va, i1 true)
   ret <vscale x 1 x i32> %a
 }
@@ -3083,31 +2521,18 @@ define <vscale x 2 x i32> @ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_zero_undef_nxv2i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32D-NEXT:    vsrl.vx v8, v10, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v10, v8
-; RV32D-NEXT:    li a0, 1054
-; RV32D-NEXT:    vrsub.vx v8, v10, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv2i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v10, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v10, v8
-; RV64D-NEXT:    li a0, 1054
-; RV64D-NEXT:    vrsub.vx v8, v10, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_zero_undef_nxv2i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v8, v10, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v10, v8
+; CHECK-D-NEXT:    li a0, 1054
+; CHECK-D-NEXT:    vrsub.vx v8, v10, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> %va, i1 true)
   ret <vscale x 2 x i32> %a
 }
@@ -3185,31 +2610,18 @@ define <vscale x 4 x i32> @ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_zero_undef_nxv4i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32D-NEXT:    vsrl.vx v8, v12, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v12, v8
-; RV32D-NEXT:    li a0, 1054
-; RV32D-NEXT:    vrsub.vx v8, v12, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv4i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v12, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v12, v8
-; RV64D-NEXT:    li a0, 1054
-; RV64D-NEXT:    vrsub.vx v8, v12, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_zero_undef_nxv4i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v8, v12, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v12, v8
+; CHECK-D-NEXT:    li a0, 1054
+; CHECK-D-NEXT:    vrsub.vx v8, v12, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 true)
   ret <vscale x 4 x i32> %a
 }
@@ -3287,31 +2699,18 @@ define <vscale x 8 x i32> @ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: ctlz_zero_undef_nxv8i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32D-NEXT:    vsrl.vx v8, v16, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v16, v8
-; RV32D-NEXT:    li a0, 1054
-; RV32D-NEXT:    vrsub.vx v8, v16, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: ctlz_zero_undef_nxv8i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v16, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v16, v8
-; RV64D-NEXT:    li a0, 1054
-; RV64D-NEXT:    vrsub.vx v8, v16, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: ctlz_zero_undef_nxv8i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v8, v16, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v16, v8
+; CHECK-D-NEXT:    li a0, 1054
+; CHECK-D-NEXT:    vrsub.vx v8, v16, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> %va, i1 true)
   ret <vscale x 8 x i32> %a
 }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index 9f5a503f56d94..a74e0b865d1ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -1,419 +1,224 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I
-; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64
 
 define <vscale x 1 x i8> @cttz_nxv1i8(<vscale x 1 x i8> %va) {
-; RV32I-LABEL: cttz_nxv1i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv1i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: cttz_nxv1i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v9, v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vzext.vf4 v10, v9
-; RV32D-NEXT:    vfcvt.f.xu.v v9, v10
-; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v9
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    vsub.vx v8, v9, a0
-; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv1i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v9, v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vzext.vf4 v10, v9
-; RV64D-NEXT:    vfcvt.f.xu.v v9, v10
-; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v9
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    vsub.vx v8, v9, a0
-; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv1i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_nxv1i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v10, v9
+; CHECK-D-NEXT:    vfcvt.f.xu.v v9, v10
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    vsub.vx v8, v9, a0
+; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 false)
   ret <vscale x 1 x i8> %a
 }
 declare <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8>, i1)
 
 define <vscale x 2 x i8> @cttz_nxv2i8(<vscale x 2 x i8> %va) {
-; RV32I-LABEL: cttz_nxv2i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv2i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: cttz_nxv2i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v9, v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vzext.vf4 v10, v9
-; RV32D-NEXT:    vfcvt.f.xu.v v9, v10
-; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v9
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    vsub.vx v8, v9, a0
-; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv2i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v9, v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vzext.vf4 v10, v9
-; RV64D-NEXT:    vfcvt.f.xu.v v9, v10
-; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v9
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    vsub.vx v8, v9, a0
-; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv2i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_nxv2i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v10, v9
+; CHECK-D-NEXT:    vfcvt.f.xu.v v9, v10
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    vsub.vx v8, v9, a0
+; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 false)
   ret <vscale x 2 x i8> %a
 }
 declare <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8>, i1)
 
 define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) {
-; RV32I-LABEL: cttz_nxv4i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv4i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: cttz_nxv4i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v9, v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vzext.vf4 v10, v9
-; RV32D-NEXT:    vfcvt.f.xu.v v10, v10
-; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v10, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v9
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    vsub.vx v8, v9, a0
-; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv4i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v9, v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vzext.vf4 v10, v9
-; RV64D-NEXT:    vfcvt.f.xu.v v10, v10
-; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v10, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v9
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    vsub.vx v8, v9, a0
-; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv4i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_nxv4i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v10, v9
+; CHECK-D-NEXT:    vfcvt.f.xu.v v10, v10
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    vsub.vx v8, v9, a0
+; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 false)
   ret <vscale x 4 x i8> %a
 }
 declare <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8>, i1)
 
 define <vscale x 8 x i8> @cttz_nxv8i8(<vscale x 8 x i8> %va) {
-; RV32I-LABEL: cttz_nxv8i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv8i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: cttz_nxv8i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v9, v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vzext.vf4 v12, v9
-; RV32D-NEXT:    vfcvt.f.xu.v v12, v12
-; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v12, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v10
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    vsub.vx v8, v9, a0
-; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv8i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v9, v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vzext.vf4 v12, v9
-; RV64D-NEXT:    vfcvt.f.xu.v v12, v12
-; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v12, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v10
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    vsub.vx v8, v9, a0
-; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv8i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_nxv8i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v12, v9
+; CHECK-D-NEXT:    vfcvt.f.xu.v v12, v12
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v10, v12, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v10
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    vsub.vx v8, v9, a0
+; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 false)
   ret <vscale x 8 x i8> %a
 }
 declare <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8>, i1)
 
 define <vscale x 16 x i8> @cttz_nxv16i8(<vscale x 16 x i8> %va) {
-; RV32I-LABEL: cttz_nxv16i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
-; RV32I-NEXT:    vsub.vx v10, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_nxv16i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
-; RV64I-NEXT:    vsub.vx v10, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: cttz_nxv16i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vrsub.vi v10, v8, 0
-; RV32D-NEXT:    vand.vv v10, v8, v10
-; RV32D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; RV32D-NEXT:    vzext.vf4 v16, v10
-; RV32D-NEXT:    vfcvt.f.xu.v v16, v16
-; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v16, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v10, v12
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    vsub.vx v8, v10, a0
-; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv16i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vrsub.vi v10, v8, 0
-; RV64D-NEXT:    vand.vv v10, v8, v10
-; RV64D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; RV64D-NEXT:    vzext.vf4 v16, v10
-; RV64D-NEXT:    vfcvt.f.xu.v v16, v16
-; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v16, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v10, v12
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    vsub.vx v8, v10, a0
-; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_nxv16i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-ZVE64X-NEXT:    vsub.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_nxv16i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
+; CHECK-D-NEXT:    vand.vv v10, v8, v10
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v16, v10
+; CHECK-D-NEXT:    vfcvt.f.xu.v v16, v16
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v12, v16, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v10, v12
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    vsub.vx v8, v10, a0
+; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 false)
   ret <vscale x 16 x i8> %a
 }
@@ -528,33 +333,19 @@ define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_nxv1i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v9, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v9
-; RV32D-NEXT:    vnsrl.wi v9, v10, 23
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v9, v9, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv1i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v9, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
-; RV64D-NEXT:    vnsrl.wi v9, v10, 23
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v9, v9, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_nxv1i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v9
+; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v9, v9, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 16
+; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 false)
   ret <vscale x 1 x i16> %a
 }
@@ -617,33 +408,19 @@ define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_nxv2i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v9, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v9
-; RV32D-NEXT:    vnsrl.wi v9, v10, 23
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v9, v9, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv2i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v9, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
-; RV64D-NEXT:    vnsrl.wi v9, v10, 23
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v9, v9, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_nxv2i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v9
+; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v9, v9, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 16
+; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 false)
   ret <vscale x 2 x i16> %a
 }
@@ -706,33 +483,19 @@ define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_nxv4i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v9, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v9
-; RV32D-NEXT:    vnsrl.wi v9, v10, 23
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v9, v9, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv4i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v9, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
-; RV64D-NEXT:    vnsrl.wi v9, v10, 23
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v9, v9, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_nxv4i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v9
+; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v9, v9, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 16
+; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 false)
   ret <vscale x 4 x i16> %a
 }
@@ -795,33 +558,19 @@ define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_nxv8i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vrsub.vi v10, v8, 0
-; RV32D-NEXT:    vand.vv v10, v8, v10
-; RV32D-NEXT:    vfwcvt.f.xu.v v12, v10
-; RV32D-NEXT:    vnsrl.wi v10, v12, 23
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v10, v10, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v10, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv8i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vrsub.vi v10, v8, 0
-; RV64D-NEXT:    vand.vv v10, v8, v10
-; RV64D-NEXT:    vfwcvt.f.xu.v v12, v10
-; RV64D-NEXT:    vnsrl.wi v10, v12, 23
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v10, v10, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v10, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_nxv8i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
+; CHECK-D-NEXT:    vand.vv v10, v8, v10
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v10
+; CHECK-D-NEXT:    vnsrl.wi v10, v12, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v10, v10, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 16
+; CHECK-D-NEXT:    vmerge.vxm v8, v10, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 false)
   ret <vscale x 8 x i16> %a
 }
@@ -884,33 +633,19 @@ define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_nxv16i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vrsub.vi v12, v8, 0
-; RV32D-NEXT:    vand.vv v12, v8, v12
-; RV32D-NEXT:    vfwcvt.f.xu.v v16, v12
-; RV32D-NEXT:    vnsrl.wi v12, v16, 23
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v12, v12, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v12, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv16i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vrsub.vi v12, v8, 0
-; RV64D-NEXT:    vand.vv v12, v8, v12
-; RV64D-NEXT:    vfwcvt.f.xu.v v16, v12
-; RV64D-NEXT:    vnsrl.wi v12, v16, 23
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v12, v12, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v12, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_nxv16i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
+; CHECK-D-NEXT:    vand.vv v12, v8, v12
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v12
+; CHECK-D-NEXT:    vnsrl.wi v12, v16, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v12, v12, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 16
+; CHECK-D-NEXT:    vmerge.vxm v8, v12, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 false)
   ret <vscale x 16 x i16> %a
 }
@@ -1036,41 +771,23 @@ define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_nxv1i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v9, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v9
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV32D-NEXT:    vsrl.vx v9, v10, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v9
-; RV32D-NEXT:    li a0, 1023
-; RV32D-NEXT:    vsub.vx v9, v9, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 32
-; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv1i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v9, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV64D-NEXT:    vsrl.vx v9, v10, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v9
-; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v9, v9, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_nxv1i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v9
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v9, v10, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
+; CHECK-D-NEXT:    li a0, 1023
+; CHECK-D-NEXT:    vsub.vx v9, v9, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 32
+; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 false)
   ret <vscale x 1 x i32> %a
 }
@@ -1135,41 +852,23 @@ define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_nxv2i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v9, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v9
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32D-NEXT:    vsrl.vx v10, v10, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v9, v10
-; RV32D-NEXT:    li a0, 1023
-; RV32D-NEXT:    vsub.vx v9, v9, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 32
-; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv2i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v9, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64D-NEXT:    vsrl.vx v10, v10, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v9, v10
-; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v9, v9, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_nxv2i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v9, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v9
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v10, v10, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v9, v10
+; CHECK-D-NEXT:    li a0, 1023
+; CHECK-D-NEXT:    vsub.vx v9, v9, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 32
+; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 false)
   ret <vscale x 2 x i32> %a
 }
@@ -1234,41 +933,23 @@ define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_nxv4i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vrsub.vi v10, v8, 0
-; RV32D-NEXT:    vand.vv v10, v8, v10
-; RV32D-NEXT:    vfwcvt.f.xu.v v12, v10
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32D-NEXT:    vsrl.vx v12, v12, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v10, v12
-; RV32D-NEXT:    li a0, 1023
-; RV32D-NEXT:    vsub.vx v10, v10, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 32
-; RV32D-NEXT:    vmerge.vxm v8, v10, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv4i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vrsub.vi v10, v8, 0
-; RV64D-NEXT:    vand.vv v10, v8, v10
-; RV64D-NEXT:    vfwcvt.f.xu.v v12, v10
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64D-NEXT:    vsrl.vx v12, v12, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v10, v12
-; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v10, v10, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v10, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_nxv4i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
+; CHECK-D-NEXT:    vand.vv v10, v8, v10
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v10
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v12, v12, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v10, v12
+; CHECK-D-NEXT:    li a0, 1023
+; CHECK-D-NEXT:    vsub.vx v10, v10, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 32
+; CHECK-D-NEXT:    vmerge.vxm v8, v10, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 false)
   ret <vscale x 4 x i32> %a
 }
@@ -1333,41 +1014,23 @@ define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_nxv8i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vrsub.vi v12, v8, 0
-; RV32D-NEXT:    vand.vv v12, v8, v12
-; RV32D-NEXT:    vfwcvt.f.xu.v v16, v12
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32D-NEXT:    vsrl.vx v16, v16, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v12, v16
-; RV32D-NEXT:    li a0, 1023
-; RV32D-NEXT:    vsub.vx v12, v12, a0
-; RV32D-NEXT:    vmseq.vi v0, v8, 0
-; RV32D-NEXT:    li a0, 32
-; RV32D-NEXT:    vmerge.vxm v8, v12, a0, v0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_nxv8i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vrsub.vi v12, v8, 0
-; RV64D-NEXT:    vand.vv v12, v8, v12
-; RV64D-NEXT:    vfwcvt.f.xu.v v16, v12
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64D-NEXT:    vsrl.vx v16, v16, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v12, v16
-; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v12, v12, a0
-; RV64D-NEXT:    vmseq.vi v0, v8, 0
-; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v12, a0, v0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_nxv8i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
+; CHECK-D-NEXT:    vand.vv v12, v8, v12
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v12
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v16, v16, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v12, v16
+; CHECK-D-NEXT:    li a0, 1023
+; CHECK-D-NEXT:    vsub.vx v12, v12, a0
+; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-D-NEXT:    li a0, 32
+; CHECK-D-NEXT:    vmerge.vxm v8, v12, a0, v0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 false)
   ret <vscale x 8 x i32> %a
 }
@@ -1773,391 +1436,206 @@ define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) {
 declare <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64>, i1)
 
 define <vscale x 1 x i8> @cttz_zero_undef_nxv1i8(<vscale x 1 x i8> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv1i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv1i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: cttz_zero_undef_nxv1i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vzext.vf4 v9, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v8
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv1i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vzext.vf4 v9, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v8
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_zero_undef_nxv1i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v9, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 true)
   ret <vscale x 1 x i8> %a
 }
 
 define <vscale x 2 x i8> @cttz_zero_undef_nxv2i8(<vscale x 2 x i8> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv2i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv2i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: cttz_zero_undef_nxv2i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vzext.vf4 v9, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v8
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv2i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vzext.vf4 v9, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v8
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_zero_undef_nxv2i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v9, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 true)
   ret <vscale x 2 x i8> %a
 }
 
 define <vscale x 4 x i8> @cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv4i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv4i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: cttz_zero_undef_nxv4i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vzext.vf4 v10, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v10
-; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v10
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv4i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vzext.vf4 v10, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v10
-; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v10
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_zero_undef_nxv4i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v10, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v10
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v10, v8, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v10
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 true)
   ret <vscale x 4 x i8> %a
 }
 
 define <vscale x 8 x i8> @cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv8i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v9, v9, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v9, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v9, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv8i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
-; RV64I-NEXT:    vsub.vx v9, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v9, v9, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v9, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: cttz_zero_undef_nxv8i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vzext.vf4 v12, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v12
-; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v12
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv8i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vzext.vf4 v12, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v12
-; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v12
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
+; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_zero_undef_nxv8i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v9
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v12, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v12
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v12, v8, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v12
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 true)
   ret <vscale x 8 x i8> %a
 }
 
 define <vscale x 16 x i8> @cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
-; RV32I-LABEL: cttz_zero_undef_nxv16i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
-; RV32I-NEXT:    vsub.vx v10, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    li a0, 85
-; RV32I-NEXT:    vand.vx v10, v10, a0
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    li a0, 51
-; RV32I-NEXT:    vand.vx v10, v8, a0
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vx v8, v8, a0
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    vand.vi v8, v8, 15
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: cttz_zero_undef_nxv16i8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, 1
-; RV64I-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
-; RV64I-NEXT:    vsub.vx v10, v8, a0
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    li a0, 85
-; RV64I-NEXT:    vand.vx v10, v10, a0
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    li a0, 51
-; RV64I-NEXT:    vand.vx v10, v8, a0
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a0
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    vand.vi v8, v8, 15
-; RV64I-NEXT:    ret
-;
-; RV32D-LABEL: cttz_zero_undef_nxv16i8:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vrsub.vi v10, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v10
-; RV32D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; RV32D-NEXT:    vzext.vf4 v16, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v16
-; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 23
-; RV32D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v16
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv16i8:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vrsub.vi v10, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v10
-; RV64D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; RV64D-NEXT:    vzext.vf4 v16, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v16
-; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 23
-; RV64D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v16
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i8:
+; CHECK-ZVE64X:       # %bb.0:
+; CHECK-ZVE64X-NEXT:    li a0, 1
+; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
+; CHECK-ZVE64X-NEXT:    vsub.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
+; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
+; CHECK-ZVE64X-NEXT:    li a0, 85
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
+; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    li a0, 51
+; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
+; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
+; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
+; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
+; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
+; CHECK-ZVE64X-NEXT:    ret
+;
+; CHECK-D-LABEL: cttz_zero_undef_nxv16i8:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v10
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-D-NEXT:    vzext.vf4 v16, v8
+; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v16
+; CHECK-D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; CHECK-D-NEXT:    vnsrl.wi v16, v8, 23
+; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v16
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 true)
   ret <vscale x 16 x i8> %a
 }
@@ -2269,27 +1747,16 @@ define <vscale x 1 x i16> @cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_zero_undef_nxv1i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    vnsrl.wi v8, v9, 23
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv1i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    vnsrl.wi v8, v9, 23
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_zero_undef_nxv1i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v9, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true)
   ret <vscale x 1 x i16> %a
 }
@@ -2351,27 +1818,16 @@ define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_zero_undef_nxv2i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    vnsrl.wi v8, v9, 23
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv2i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    vnsrl.wi v8, v9, 23
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_zero_undef_nxv2i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v9, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true)
   ret <vscale x 2 x i16> %a
 }
@@ -2433,27 +1889,16 @@ define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_zero_undef_nxv4i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV32D-NEXT:    vnsrl.wi v8, v10, 23
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv4i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV64D-NEXT:    vnsrl.wi v8, v10, 23
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_zero_undef_nxv4i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v10, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true)
   ret <vscale x 4 x i16> %a
 }
@@ -2515,27 +1960,16 @@ define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_zero_undef_nxv8i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vrsub.vi v10, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v10
-; RV32D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV32D-NEXT:    vnsrl.wi v8, v12, 23
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv8i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vrsub.vi v10, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v10
-; RV64D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV64D-NEXT:    vnsrl.wi v8, v12, 23
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_zero_undef_nxv8i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v10
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v12, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true)
   ret <vscale x 8 x i16> %a
 }
@@ -2597,27 +2031,16 @@ define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 8
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_zero_undef_nxv16i16:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vrsub.vi v12, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v12
-; RV32D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV32D-NEXT:    vnsrl.wi v8, v16, 23
-; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv16i16:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vrsub.vi v12, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v12
-; RV64D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV64D-NEXT:    vnsrl.wi v8, v16, 23
-; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_zero_undef_nxv16i16:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v12
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
+; CHECK-D-NEXT:    vnsrl.wi v8, v16, 23
+; CHECK-D-NEXT:    li a0, 127
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true)
   ret <vscale x 16 x i16> %a
 }
@@ -2741,35 +2164,20 @@ define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_zero_undef_nxv1i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV32D-NEXT:    vsrl.vx v8, v9, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v8
-; RV32D-NEXT:    li a0, 1023
-; RV32D-NEXT:    vsub.vx v8, v8, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv1i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v9, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v8
-; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v8, v8, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_zero_undef_nxv1i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v8, v9, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-D-NEXT:    li a0, 1023
+; CHECK-D-NEXT:    vsub.vx v8, v8, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true)
   ret <vscale x 1 x i32> %a
 }
@@ -2833,35 +2241,20 @@ define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_zero_undef_nxv2i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV32D-NEXT:    vsrl.vx v8, v10, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v10, v8
-; RV32D-NEXT:    li a0, 1023
-; RV32D-NEXT:    vsub.vx v8, v10, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv2i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v10, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v10, v8
-; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v8, v10, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_zero_undef_nxv2i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v9
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v8, v10, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v10, v8
+; CHECK-D-NEXT:    li a0, 1023
+; CHECK-D-NEXT:    vsub.vx v8, v10, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true)
   ret <vscale x 2 x i32> %a
 }
@@ -2925,35 +2318,20 @@ define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_zero_undef_nxv4i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vrsub.vi v10, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v10
-; RV32D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32D-NEXT:    vsrl.vx v8, v12, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v12, v8
-; RV32D-NEXT:    li a0, 1023
-; RV32D-NEXT:    vsub.vx v8, v12, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv4i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vrsub.vi v10, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v10
-; RV64D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v12, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v12, v8
-; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v8, v12, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_zero_undef_nxv4i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v10
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v8, v12, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v12, v8
+; CHECK-D-NEXT:    li a0, 1023
+; CHECK-D-NEXT:    vsub.vx v8, v12, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true)
   ret <vscale x 4 x i32> %a
 }
@@ -3017,35 +2395,20 @@ define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV64I-NEXT:    vsrl.vi v8, v8, 24
 ; RV64I-NEXT:    ret
 ;
-; RV32D-LABEL: cttz_zero_undef_nxv8i32:
-; RV32D:       # %bb.0:
-; RV32D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vrsub.vi v12, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v12
-; RV32D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV32D-NEXT:    li a0, 52
-; RV32D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV32D-NEXT:    vsrl.vx v8, v16, a0
-; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v16, v8
-; RV32D-NEXT:    li a0, 1023
-; RV32D-NEXT:    vsub.vx v8, v16, a0
-; RV32D-NEXT:    ret
-;
-; RV64D-LABEL: cttz_zero_undef_nxv8i32:
-; RV64D:       # %bb.0:
-; RV64D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vrsub.vi v12, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v12
-; RV64D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV64D-NEXT:    li a0, 52
-; RV64D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v16, a0
-; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v16, v8
-; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v8, v16, a0
-; RV64D-NEXT:    ret
+; CHECK-D-LABEL: cttz_zero_undef_nxv8i32:
+; CHECK-D:       # %bb.0:
+; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
+; CHECK-D-NEXT:    vand.vv v8, v8, v12
+; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
+; CHECK-D-NEXT:    li a0, 52
+; CHECK-D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
+; CHECK-D-NEXT:    vsrl.vx v8, v16, a0
+; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-D-NEXT:    vncvt.x.x.w v16, v8
+; CHECK-D-NEXT:    li a0, 1023
+; CHECK-D-NEXT:    vsub.vx v8, v16, a0
+; CHECK-D-NEXT:    ret
   %a = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true)
   ret <vscale x 8 x i32> %a
 }

diff  --git a/llvm/test/CodeGen/RISCV/ssub_sat.ll b/llvm/test/CodeGen/RISCV/ssub_sat.ll
index b250773b30978..11293346da5a1 100644
--- a/llvm/test/CodeGen/RISCV/ssub_sat.ll
+++ b/llvm/test/CodeGen/RISCV/ssub_sat.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RV32I
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefix=RV64I
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV32IZbb,RV32IZbbNOZbt
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV64IZbb,RV64IZbbNOZbt
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefixes=RV32NoZbt,RV32I
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefixes=RV64NoZbt,RV64I
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV32NoZbt,RV32IZbb
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV64NoZbt,RV64IZbb
 ; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb,+experimental-zbt | FileCheck %s --check-prefixes=RV32IZbb,RV32IZbbZbt
 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+experimental-zbt | FileCheck %s --check-prefixes=RV64IZbb,RV64IZbbZbt
 
@@ -13,19 +13,19 @@ declare i32 @llvm.ssub.sat.i32(i32, i32)
 declare i64 @llvm.ssub.sat.i64(i64, i64)
 
 define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
-; RV32I-LABEL: func:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    sgtz a3, a1
-; RV32I-NEXT:    sub a0, a0, a1
-; RV32I-NEXT:    slt a1, a0, a2
-; RV32I-NEXT:    beq a3, a1, .LBB0_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    srai a0, a0, 31
-; RV32I-NEXT:    lui a1, 524288
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:  .LBB0_2:
-; RV32I-NEXT:    ret
+; RV32NoZbt-LABEL: func:
+; RV32NoZbt:       # %bb.0:
+; RV32NoZbt-NEXT:    mv a2, a0
+; RV32NoZbt-NEXT:    sgtz a3, a1
+; RV32NoZbt-NEXT:    sub a0, a0, a1
+; RV32NoZbt-NEXT:    slt a1, a0, a2
+; RV32NoZbt-NEXT:    beq a3, a1, .LBB0_2
+; RV32NoZbt-NEXT:  # %bb.1:
+; RV32NoZbt-NEXT:    srai a0, a0, 31
+; RV32NoZbt-NEXT:    lui a1, 524288
+; RV32NoZbt-NEXT:    xor a0, a0, a1
+; RV32NoZbt-NEXT:  .LBB0_2:
+; RV32NoZbt-NEXT:    ret
 ;
 ; RV64I-LABEL: func:
 ; RV64I:       # %bb.0:
@@ -44,20 +44,6 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
 ; RV64I-NEXT:    lui a0, 524288
 ; RV64I-NEXT:    ret
 ;
-; RV32IZbbNOZbt-LABEL: func:
-; RV32IZbbNOZbt:       # %bb.0:
-; RV32IZbbNOZbt-NEXT:    mv a2, a0
-; RV32IZbbNOZbt-NEXT:    sgtz a3, a1
-; RV32IZbbNOZbt-NEXT:    sub a0, a0, a1
-; RV32IZbbNOZbt-NEXT:    slt a1, a0, a2
-; RV32IZbbNOZbt-NEXT:    beq a3, a1, .LBB0_2
-; RV32IZbbNOZbt-NEXT:  # %bb.1:
-; RV32IZbbNOZbt-NEXT:    srai a0, a0, 31
-; RV32IZbbNOZbt-NEXT:    lui a1, 524288
-; RV32IZbbNOZbt-NEXT:    xor a0, a0, a1
-; RV32IZbbNOZbt-NEXT:  .LBB0_2:
-; RV32IZbbNOZbt-NEXT:    ret
-;
 ; RV64IZbb-LABEL: func:
 ; RV64IZbb:       # %bb.0:
 ; RV64IZbb-NEXT:    sub a0, a0, a1
@@ -83,73 +69,39 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
 }
 
 define i64 @func2(i64 %x, i64 %y) nounwind {
-; RV32I-LABEL: func2:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    mv a4, a1
-; RV32I-NEXT:    sltu a1, a0, a2
-; RV32I-NEXT:    sub a5, a4, a3
-; RV32I-NEXT:    sub a1, a5, a1
-; RV32I-NEXT:    xor a5, a4, a1
-; RV32I-NEXT:    xor a3, a4, a3
-; RV32I-NEXT:    and a3, a3, a5
-; RV32I-NEXT:    bltz a3, .LBB1_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sub a0, a0, a2
-; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB1_2:
-; RV32I-NEXT:    srai a0, a1, 31
-; RV32I-NEXT:    lui a1, 524288
-; RV32I-NEXT:    xor a1, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: func2:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    sgtz a3, a1
-; RV64I-NEXT:    sub a0, a0, a1
-; RV64I-NEXT:    slt a1, a0, a2
-; RV64I-NEXT:    beq a3, a1, .LBB1_2
-; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    srai a0, a0, 63
-; RV64I-NEXT:    li a1, -1
-; RV64I-NEXT:    slli a1, a1, 63
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:  .LBB1_2:
-; RV64I-NEXT:    ret
-;
-; RV32IZbbNOZbt-LABEL: func2:
-; RV32IZbbNOZbt:       # %bb.0:
-; RV32IZbbNOZbt-NEXT:    mv a4, a1
-; RV32IZbbNOZbt-NEXT:    sltu a1, a0, a2
-; RV32IZbbNOZbt-NEXT:    sub a5, a4, a3
-; RV32IZbbNOZbt-NEXT:    sub a1, a5, a1
-; RV32IZbbNOZbt-NEXT:    xor a5, a4, a1
-; RV32IZbbNOZbt-NEXT:    xor a3, a4, a3
-; RV32IZbbNOZbt-NEXT:    and a3, a3, a5
-; RV32IZbbNOZbt-NEXT:    bltz a3, .LBB1_2
-; RV32IZbbNOZbt-NEXT:  # %bb.1:
-; RV32IZbbNOZbt-NEXT:    sub a0, a0, a2
-; RV32IZbbNOZbt-NEXT:    ret
-; RV32IZbbNOZbt-NEXT:  .LBB1_2:
-; RV32IZbbNOZbt-NEXT:    srai a0, a1, 31
-; RV32IZbbNOZbt-NEXT:    lui a1, 524288
-; RV32IZbbNOZbt-NEXT:    xor a1, a0, a1
-; RV32IZbbNOZbt-NEXT:    ret
+; RV32NoZbt-LABEL: func2:
+; RV32NoZbt:       # %bb.0:
+; RV32NoZbt-NEXT:    mv a4, a1
+; RV32NoZbt-NEXT:    sltu a1, a0, a2
+; RV32NoZbt-NEXT:    sub a5, a4, a3
+; RV32NoZbt-NEXT:    sub a1, a5, a1
+; RV32NoZbt-NEXT:    xor a5, a4, a1
+; RV32NoZbt-NEXT:    xor a3, a4, a3
+; RV32NoZbt-NEXT:    and a3, a3, a5
+; RV32NoZbt-NEXT:    bltz a3, .LBB1_2
+; RV32NoZbt-NEXT:  # %bb.1:
+; RV32NoZbt-NEXT:    sub a0, a0, a2
+; RV32NoZbt-NEXT:    ret
+; RV32NoZbt-NEXT:  .LBB1_2:
+; RV32NoZbt-NEXT:    srai a0, a1, 31
+; RV32NoZbt-NEXT:    lui a1, 524288
+; RV32NoZbt-NEXT:    xor a1, a0, a1
+; RV32NoZbt-NEXT:    ret
 ;
-; RV64IZbbNOZbt-LABEL: func2:
-; RV64IZbbNOZbt:       # %bb.0:
-; RV64IZbbNOZbt-NEXT:    mv a2, a0
-; RV64IZbbNOZbt-NEXT:    sgtz a3, a1
-; RV64IZbbNOZbt-NEXT:    sub a0, a0, a1
-; RV64IZbbNOZbt-NEXT:    slt a1, a0, a2
-; RV64IZbbNOZbt-NEXT:    beq a3, a1, .LBB1_2
-; RV64IZbbNOZbt-NEXT:  # %bb.1:
-; RV64IZbbNOZbt-NEXT:    srai a0, a0, 63
-; RV64IZbbNOZbt-NEXT:    li a1, -1
-; RV64IZbbNOZbt-NEXT:    slli a1, a1, 63
-; RV64IZbbNOZbt-NEXT:    xor a0, a0, a1
-; RV64IZbbNOZbt-NEXT:  .LBB1_2:
-; RV64IZbbNOZbt-NEXT:    ret
+; RV64NoZbt-LABEL: func2:
+; RV64NoZbt:       # %bb.0:
+; RV64NoZbt-NEXT:    mv a2, a0
+; RV64NoZbt-NEXT:    sgtz a3, a1
+; RV64NoZbt-NEXT:    sub a0, a0, a1
+; RV64NoZbt-NEXT:    slt a1, a0, a2
+; RV64NoZbt-NEXT:    beq a3, a1, .LBB1_2
+; RV64NoZbt-NEXT:  # %bb.1:
+; RV64NoZbt-NEXT:    srai a0, a0, 63
+; RV64NoZbt-NEXT:    li a1, -1
+; RV64NoZbt-NEXT:    slli a1, a1, 63
+; RV64NoZbt-NEXT:    xor a0, a0, a1
+; RV64NoZbt-NEXT:  .LBB1_2:
+; RV64NoZbt-NEXT:    ret
 ;
 ; RV32IZbbZbt-LABEL: func2:
 ; RV32IZbbZbt:       # %bb.0:

diff  --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
index a7c366ce1679f..fce397e447607 100644
--- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RV32I
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefix=RV64I
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV32IZbb,RV32IZbbNOZbt
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV64IZbb,RV64IZbbNOZbt
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefixes=RV32NoZbt,RV32I
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefixes=RV64NoZbt,RV64I
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV32NoZbt,RV32IZbb
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefixes=RV64NoZbt,RV64IZbb
 ; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb,+experimental-zbt | FileCheck %s --check-prefixes=RV32IZbb,RV32IZbbZbt
 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+experimental-zbt | FileCheck %s --check-prefixes=RV64IZbb,RV64IZbbZbt
 
@@ -13,20 +13,20 @@ declare i32 @llvm.ssub.sat.i32(i32, i32)
 declare i64 @llvm.ssub.sat.i64(i64, i64)
 
 define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
-; RV32I-LABEL: func32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    mv a3, a0
-; RV32I-NEXT:    mul a0, a1, a2
-; RV32I-NEXT:    sgtz a1, a0
-; RV32I-NEXT:    sub a0, a3, a0
-; RV32I-NEXT:    slt a2, a0, a3
-; RV32I-NEXT:    beq a1, a2, .LBB0_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    srai a0, a0, 31
-; RV32I-NEXT:    lui a1, 524288
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:  .LBB0_2:
-; RV32I-NEXT:    ret
+; RV32NoZbt-LABEL: func32:
+; RV32NoZbt:       # %bb.0:
+; RV32NoZbt-NEXT:    mv a3, a0
+; RV32NoZbt-NEXT:    mul a0, a1, a2
+; RV32NoZbt-NEXT:    sgtz a1, a0
+; RV32NoZbt-NEXT:    sub a0, a3, a0
+; RV32NoZbt-NEXT:    slt a2, a0, a3
+; RV32NoZbt-NEXT:    beq a1, a2, .LBB0_2
+; RV32NoZbt-NEXT:  # %bb.1:
+; RV32NoZbt-NEXT:    srai a0, a0, 31
+; RV32NoZbt-NEXT:    lui a1, 524288
+; RV32NoZbt-NEXT:    xor a0, a0, a1
+; RV32NoZbt-NEXT:  .LBB0_2:
+; RV32NoZbt-NEXT:    ret
 ;
 ; RV64I-LABEL: func32:
 ; RV64I:       # %bb.0:
@@ -47,21 +47,6 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
 ; RV64I-NEXT:    lui a0, 524288
 ; RV64I-NEXT:    ret
 ;
-; RV32IZbbNOZbt-LABEL: func32:
-; RV32IZbbNOZbt:       # %bb.0:
-; RV32IZbbNOZbt-NEXT:    mv a3, a0
-; RV32IZbbNOZbt-NEXT:    mul a0, a1, a2
-; RV32IZbbNOZbt-NEXT:    sgtz a1, a0
-; RV32IZbbNOZbt-NEXT:    sub a0, a3, a0
-; RV32IZbbNOZbt-NEXT:    slt a2, a0, a3
-; RV32IZbbNOZbt-NEXT:    beq a1, a2, .LBB0_2
-; RV32IZbbNOZbt-NEXT:  # %bb.1:
-; RV32IZbbNOZbt-NEXT:    srai a0, a0, 31
-; RV32IZbbNOZbt-NEXT:    lui a1, 524288
-; RV32IZbbNOZbt-NEXT:    xor a0, a0, a1
-; RV32IZbbNOZbt-NEXT:  .LBB0_2:
-; RV32IZbbNOZbt-NEXT:    ret
-;
 ; RV64IZbb-LABEL: func32:
 ; RV64IZbb:       # %bb.0:
 ; RV64IZbb-NEXT:    sext.w a0, a0
@@ -91,73 +76,39 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
 }
 
 define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
-; RV32I-LABEL: func64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    sltu a1, a0, a4
-; RV32I-NEXT:    sub a3, a2, a5
-; RV32I-NEXT:    sub a1, a3, a1
-; RV32I-NEXT:    xor a3, a2, a1
-; RV32I-NEXT:    xor a2, a2, a5
-; RV32I-NEXT:    and a2, a2, a3
-; RV32I-NEXT:    bltz a2, .LBB1_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sub a0, a0, a4
-; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB1_2:
-; RV32I-NEXT:    srai a0, a1, 31
-; RV32I-NEXT:    lui a1, 524288
-; RV32I-NEXT:    xor a1, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: func64:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    sgtz a3, a2
-; RV64I-NEXT:    sub a0, a0, a2
-; RV64I-NEXT:    slt a1, a0, a1
-; RV64I-NEXT:    beq a3, a1, .LBB1_2
-; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    srai a0, a0, 63
-; RV64I-NEXT:    li a1, -1
-; RV64I-NEXT:    slli a1, a1, 63
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:  .LBB1_2:
-; RV64I-NEXT:    ret
-;
-; RV32IZbbNOZbt-LABEL: func64:
-; RV32IZbbNOZbt:       # %bb.0:
-; RV32IZbbNOZbt-NEXT:    mv a2, a1
-; RV32IZbbNOZbt-NEXT:    sltu a1, a0, a4
-; RV32IZbbNOZbt-NEXT:    sub a3, a2, a5
-; RV32IZbbNOZbt-NEXT:    sub a1, a3, a1
-; RV32IZbbNOZbt-NEXT:    xor a3, a2, a1
-; RV32IZbbNOZbt-NEXT:    xor a2, a2, a5
-; RV32IZbbNOZbt-NEXT:    and a2, a2, a3
-; RV32IZbbNOZbt-NEXT:    bltz a2, .LBB1_2
-; RV32IZbbNOZbt-NEXT:  # %bb.1:
-; RV32IZbbNOZbt-NEXT:    sub a0, a0, a4
-; RV32IZbbNOZbt-NEXT:    ret
-; RV32IZbbNOZbt-NEXT:  .LBB1_2:
-; RV32IZbbNOZbt-NEXT:    srai a0, a1, 31
-; RV32IZbbNOZbt-NEXT:    lui a1, 524288
-; RV32IZbbNOZbt-NEXT:    xor a1, a0, a1
-; RV32IZbbNOZbt-NEXT:    ret
+; RV32NoZbt-LABEL: func64:
+; RV32NoZbt:       # %bb.0:
+; RV32NoZbt-NEXT:    mv a2, a1
+; RV32NoZbt-NEXT:    sltu a1, a0, a4
+; RV32NoZbt-NEXT:    sub a3, a2, a5
+; RV32NoZbt-NEXT:    sub a1, a3, a1
+; RV32NoZbt-NEXT:    xor a3, a2, a1
+; RV32NoZbt-NEXT:    xor a2, a2, a5
+; RV32NoZbt-NEXT:    and a2, a2, a3
+; RV32NoZbt-NEXT:    bltz a2, .LBB1_2
+; RV32NoZbt-NEXT:  # %bb.1:
+; RV32NoZbt-NEXT:    sub a0, a0, a4
+; RV32NoZbt-NEXT:    ret
+; RV32NoZbt-NEXT:  .LBB1_2:
+; RV32NoZbt-NEXT:    srai a0, a1, 31
+; RV32NoZbt-NEXT:    lui a1, 524288
+; RV32NoZbt-NEXT:    xor a1, a0, a1
+; RV32NoZbt-NEXT:    ret
 ;
-; RV64IZbbNOZbt-LABEL: func64:
-; RV64IZbbNOZbt:       # %bb.0:
-; RV64IZbbNOZbt-NEXT:    mv a1, a0
-; RV64IZbbNOZbt-NEXT:    sgtz a3, a2
-; RV64IZbbNOZbt-NEXT:    sub a0, a0, a2
-; RV64IZbbNOZbt-NEXT:    slt a1, a0, a1
-; RV64IZbbNOZbt-NEXT:    beq a3, a1, .LBB1_2
-; RV64IZbbNOZbt-NEXT:  # %bb.1:
-; RV64IZbbNOZbt-NEXT:    srai a0, a0, 63
-; RV64IZbbNOZbt-NEXT:    li a1, -1
-; RV64IZbbNOZbt-NEXT:    slli a1, a1, 63
-; RV64IZbbNOZbt-NEXT:    xor a0, a0, a1
-; RV64IZbbNOZbt-NEXT:  .LBB1_2:
-; RV64IZbbNOZbt-NEXT:    ret
+; RV64NoZbt-LABEL: func64:
+; RV64NoZbt:       # %bb.0:
+; RV64NoZbt-NEXT:    mv a1, a0
+; RV64NoZbt-NEXT:    sgtz a3, a2
+; RV64NoZbt-NEXT:    sub a0, a0, a2
+; RV64NoZbt-NEXT:    slt a1, a0, a1
+; RV64NoZbt-NEXT:    beq a3, a1, .LBB1_2
+; RV64NoZbt-NEXT:  # %bb.1:
+; RV64NoZbt-NEXT:    srai a0, a0, 63
+; RV64NoZbt-NEXT:    li a1, -1
+; RV64NoZbt-NEXT:    slli a1, a1, 63
+; RV64NoZbt-NEXT:    xor a0, a0, a1
+; RV64NoZbt-NEXT:  .LBB1_2:
+; RV64NoZbt-NEXT:    ret
 ;
 ; RV32IZbbZbt-LABEL: func64:
 ; RV32IZbbZbt:       # %bb.0:

diff  --git a/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
index 9ef166e585636..645de97304a2d 100644
--- a/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
+++ b/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
@@ -1,46 +1,31 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32,RV32I
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-I,RV32,RV32I
 ; RUN: llc -mtriple=riscv64 < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64,RV64I
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-I,RV64,RV64I
 ; RUN: llc -mtriple=riscv32 -mattr=+zbb < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZBB
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV32,RV32ZBB
 ; RUN: llc -mtriple=riscv64 -mattr=+zbb < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZBB
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV64,RV64ZBB
 
 ; TODO: Should we convert these to X ^ ((X ^ Y) & M) form when Zbb isn't
 ; present?
 
 define i8 @out8(i8 %x, i8 %y, i8 %mask) {
-; RV32I-LABEL: out8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    not a2, a2
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: out8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    not a2, a2
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: out8:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: out8:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: out8:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    not a2, a2
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    or a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: out8:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %mx = and i8 %x, %mask
   %notmask = xor i8 %mask, -1
   %my = and i8 %y, %notmask
@@ -49,35 +34,20 @@ define i8 @out8(i8 %x, i8 %y, i8 %mask) {
 }
 
 define i16 @out16(i16 %x, i16 %y, i16 %mask) {
-; RV32I-LABEL: out16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    not a2, a2
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: out16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    not a2, a2
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: out16:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: out16:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: out16:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    not a2, a2
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    or a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: out16:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %mx = and i16 %x, %mask
   %notmask = xor i16 %mask, -1
   %my = and i16 %y, %notmask
@@ -86,35 +56,20 @@ define i16 @out16(i16 %x, i16 %y, i16 %mask) {
 }
 
 define i32 @out32(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: out32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    not a2, a2
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: out32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    not a2, a2
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: out32:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: out32:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: out32:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    not a2, a2
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    or a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: out32:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %mx = and i32 %x, %mask
   %notmask = xor i32 %mask, -1
   %my = and i32 %y, %notmask
@@ -172,33 +127,19 @@ define i64 @out64(i64 %x, i64 %y, i64 %mask) {
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 define i8 @in8(i8 %x, i8 %y, i8 %mask) {
-; RV32I-LABEL: in8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in8:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in8:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in8:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in8:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in8:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i8 %x, %y
   %n1 = and i8 %n0, %mask
   %r = xor i8 %n1, %y
@@ -206,33 +147,19 @@ define i8 @in8(i8 %x, i8 %y, i8 %mask) {
 }
 
 define i16 @in16(i16 %x, i16 %y, i16 %mask) {
-; RV32I-LABEL: in16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in16:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in16:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in16:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in16:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in16:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i16 %x, %y
   %n1 = and i16 %n0, %mask
   %r = xor i16 %n1, %y
@@ -240,33 +167,19 @@ define i16 @in16(i16 %x, i16 %y, i16 %mask) {
 }
 
 define i32 @in32(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in32:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in32:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in32:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in32:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
   %r = xor i32 %n1, %y
@@ -318,33 +231,19 @@ define i64 @in64(i64 %x, i64 %y, i64 %mask) {
 ; ============================================================================ ;
 
 define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_commutativity_0_0_1:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a2, a0
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_commutativity_0_0_1:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a2, a0
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_commutativity_0_0_1:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_commutativity_0_0_1:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_commutativity_0_0_1:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a2, a0
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_commutativity_0_0_1:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
   %r = xor i32 %n1, %y
@@ -352,33 +251,19 @@ define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_commutativity_0_1_0:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    xor a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_commutativity_0_1_0:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    xor a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_commutativity_0_1_0:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_commutativity_0_1_0:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_commutativity_0_1_0:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    xor a0, a1, a0
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_commutativity_0_1_0:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
   %r = xor i32 %y, %n1 ; swapped
@@ -386,33 +271,19 @@ define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_commutativity_0_1_1:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a2, a0
-; RV32I-NEXT:    xor a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_commutativity_0_1_1:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a2, a0
-; RV64I-NEXT:    xor a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_commutativity_0_1_1:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_commutativity_0_1_1:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_commutativity_0_1_1:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a2, a0
+; CHECK-I-NEXT:    xor a0, a1, a0
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_commutativity_0_1_1:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
   %r = xor i32 %y, %n1 ; swapped
@@ -420,33 +291,19 @@ define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_commutativity_1_0_0:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a1, a0, a1
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    xor a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_commutativity_1_0_0:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a1, a0, a1
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    xor a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_commutativity_1_0_0:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a0, a2
-; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    or a0, a1, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_commutativity_1_0_0:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a0, a2
-; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    or a0, a1, a0
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_commutativity_1_0_0:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a1, a0, a1
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    xor a0, a1, a0
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_commutativity_1_0_0:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a0, a2
+; CHECK-ZBB-NEXT:    and a1, a1, a2
+; CHECK-ZBB-NEXT:    or a0, a1, a0
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
   %r = xor i32 %n1, %x ; %x instead of %y
@@ -454,33 +311,19 @@ define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_commutativity_1_0_1:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a1, a0, a1
-; RV32I-NEXT:    and a1, a2, a1
-; RV32I-NEXT:    xor a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_commutativity_1_0_1:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a1, a0, a1
-; RV64I-NEXT:    and a1, a2, a1
-; RV64I-NEXT:    xor a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_commutativity_1_0_1:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a0, a2
-; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    or a0, a1, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_commutativity_1_0_1:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a0, a2
-; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    or a0, a1, a0
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_commutativity_1_0_1:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a1, a0, a1
+; CHECK-I-NEXT:    and a1, a2, a1
+; CHECK-I-NEXT:    xor a0, a1, a0
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_commutativity_1_0_1:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a0, a2
+; CHECK-ZBB-NEXT:    and a1, a1, a2
+; CHECK-ZBB-NEXT:    or a0, a1, a0
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
   %r = xor i32 %n1, %x ; %x instead of %y
@@ -488,33 +331,19 @@ define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_commutativity_1_1_0:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a1, a0, a1
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_commutativity_1_1_0:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a1, a0, a1
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_commutativity_1_1_0:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a0, a2
-; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    or a0, a1, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_commutativity_1_1_0:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a0, a2
-; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    or a0, a1, a0
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_commutativity_1_1_0:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a1, a0, a1
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_commutativity_1_1_0:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a0, a2
+; CHECK-ZBB-NEXT:    and a1, a1, a2
+; CHECK-ZBB-NEXT:    or a0, a1, a0
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
   %r = xor i32 %x, %n1 ; swapped, %x instead of %y
@@ -522,33 +351,19 @@ define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_commutativity_1_1_1:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a1, a0, a1
-; RV32I-NEXT:    and a1, a2, a1
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_commutativity_1_1_1:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a1, a0, a1
-; RV64I-NEXT:    and a1, a2, a1
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_commutativity_1_1_1:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a0, a2
-; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    or a0, a1, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_commutativity_1_1_1:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a0, a2
-; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    or a0, a1, a0
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_commutativity_1_1_1:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a1, a0, a1
+; CHECK-I-NEXT:    and a1, a2, a1
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_commutativity_1_1_1:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a0, a2
+; CHECK-ZBB-NEXT:    and a1, a1, a2
+; CHECK-ZBB-NEXT:    or a0, a1, a0
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
   %r = xor i32 %x, %n1 ; swapped, %x instead of %y
@@ -560,37 +375,21 @@ define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) {
 ; ============================================================================ ;
 
 define i32 @in_complex_y0(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) {
-; RV32I-LABEL: in_complex_y0:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a0, a3
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_complex_y0:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a0, a3
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_complex_y0:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a3
-; RV32ZBB-NEXT:    andn a1, a1, a3
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_complex_y0:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a3
-; RV64ZBB-NEXT:    andn a1, a1, a3
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_complex_y0:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a0, a3
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_complex_y0:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a3
+; CHECK-ZBB-NEXT:    andn a1, a1, a3
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
@@ -599,37 +398,21 @@ define i32 @in_complex_y0(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) {
 }
 
 define i32 @in_complex_y1(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) {
-; RV32I-LABEL: in_complex_y1:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a0, a3
-; RV32I-NEXT:    xor a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_complex_y1:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a0, a3
-; RV64I-NEXT:    xor a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_complex_y1:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a3
-; RV32ZBB-NEXT:    andn a1, a1, a3
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_complex_y1:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a3
-; RV64ZBB-NEXT:    andn a1, a1, a3
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_complex_y1:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a0, a3
+; CHECK-I-NEXT:    xor a0, a1, a0
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_complex_y1:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a3
+; CHECK-ZBB-NEXT:    andn a1, a1, a3
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
@@ -642,123 +425,73 @@ define i32 @in_complex_y1(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) {
 ; ============================================================================ ;
 
 define i32 @in_complex_m0(i32 %x, i32 %y, i32 %m_a, i32 %m_b) {
-; RV32I-LABEL: in_complex_m0:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a2, a2, a3
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_complex_m0:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a2, a2, a3
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_complex_m0:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    xor a2, a2, a3
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_complex_m0:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    xor a2, a2, a3
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_complex_m0:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a2, a2, a3
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_complex_m0:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %mask = xor i32 %m_a, %m_b
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
   %r = xor i32 %n1, %y
-  ret i32 %r
-}
-
-define i32 @in_complex_m1(i32 %x, i32 %y, i32 %m_a, i32 %m_b) {
-; RV32I-LABEL: in_complex_m1:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a2, a2, a3
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a2, a0
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_complex_m1:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xor a2, a2, a3
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a2, a0
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_complex_m1:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    xor a2, a2, a3
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_complex_m1:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    xor a2, a2, a3
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
-  %mask = xor i32 %m_a, %m_b
-  %n0 = xor i32 %x, %y
-  %n1 = and i32 %mask, %n0
-  %r = xor i32 %n1, %y
-  ret i32 %r
-}
-
-; ============================================================================ ;
-; Both Y and M are complex.
-; ============================================================================ ;
-
-define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) {
-; RV32I-LABEL: in_complex_y0_m0:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    xor a2, a3, a4
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_complex_y0_m0:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    xor a2, a3, a4
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_complex_y0_m0:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    xor a2, a3, a4
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_complex_y0_m0:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    xor a2, a3, a4
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+  ret i32 %r
+}
+
+define i32 @in_complex_m1(i32 %x, i32 %y, i32 %m_a, i32 %m_b) {
+; CHECK-I-LABEL: in_complex_m1:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xor a2, a2, a3
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a2, a0
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_complex_m1:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    xor a2, a2, a3
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
+  %mask = xor i32 %m_a, %m_b
+  %n0 = xor i32 %x, %y
+  %n1 = and i32 %mask, %n0
+  %r = xor i32 %n1, %y
+  ret i32 %r
+}
+
+; ============================================================================ ;
+; Both Y and M are complex.
+; ============================================================================ ;
+
+define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) {
+; CHECK-I-LABEL: in_complex_y0_m0:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    xor a2, a3, a4
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_complex_y0_m0:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a1, a1, a2
+; CHECK-ZBB-NEXT:    xor a2, a3, a4
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b
   %n0 = xor i32 %x, %y
@@ -768,41 +501,23 @@ define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b)
 }
 
 define i32 @in_complex_y1_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) {
-; RV32I-LABEL: in_complex_y1_m0:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    xor a2, a3, a4
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    xor a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_complex_y1_m0:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    xor a2, a3, a4
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    xor a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_complex_y1_m0:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    xor a2, a3, a4
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_complex_y1_m0:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    xor a2, a3, a4
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_complex_y1_m0:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    xor a2, a3, a4
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    xor a0, a1, a0
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_complex_y1_m0:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a1, a1, a2
+; CHECK-ZBB-NEXT:    xor a2, a3, a4
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b
   %n0 = xor i32 %x, %y
@@ -812,41 +527,23 @@ define i32 @in_complex_y1_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b)
 }
 
 define i32 @in_complex_y0_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) {
-; RV32I-LABEL: in_complex_y0_m1:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    xor a2, a3, a4
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a2, a0
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_complex_y0_m1:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    xor a2, a3, a4
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a2, a0
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_complex_y0_m1:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    xor a2, a3, a4
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_complex_y0_m1:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    xor a2, a3, a4
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_complex_y0_m1:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    xor a2, a3, a4
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a2, a0
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_complex_y0_m1:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a1, a1, a2
+; CHECK-ZBB-NEXT:    xor a2, a3, a4
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b
   %n0 = xor i32 %x, %y
@@ -856,41 +553,23 @@ define i32 @in_complex_y0_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b)
 }
 
 define i32 @in_complex_y1_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) {
-; RV32I-LABEL: in_complex_y1_m1:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    xor a2, a3, a4
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    and a0, a2, a0
-; RV32I-NEXT:    xor a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_complex_y1_m1:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    xor a2, a3, a4
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    and a0, a2, a0
-; RV64I-NEXT:    xor a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_complex_y1_m1:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    xor a2, a3, a4
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_complex_y1_m1:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    xor a2, a3, a4
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_complex_y1_m1:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    xor a2, a3, a4
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    and a0, a2, a0
+; CHECK-I-NEXT:    xor a0, a1, a0
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_complex_y1_m1:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a1, a1, a2
+; CHECK-ZBB-NEXT:    xor a2, a3, a4
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b
   %n0 = xor i32 %x, %y
@@ -904,31 +583,18 @@ define i32 @in_complex_y1_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b)
 ; ============================================================================ ;
 
 define i32 @out_constant_varx_mone(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: out_constant_varx_mone:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a1, a2
-; RV32I-NEXT:    and a0, a2, a0
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: out_constant_varx_mone:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a1, a2
-; RV64I-NEXT:    and a0, a2, a0
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: out_constant_varx_mone:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a0, a2, a0
-; RV32ZBB-NEXT:    orn a0, a0, a2
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: out_constant_varx_mone:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a0, a2, a0
-; RV64ZBB-NEXT:    orn a0, a0, a2
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: out_constant_varx_mone:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a1, a2
+; CHECK-I-NEXT:    and a0, a2, a0
+; CHECK-I-NEXT:    or a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: out_constant_varx_mone:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a0, a2, a0
+; CHECK-ZBB-NEXT:    orn a0, a0, a2
+; CHECK-ZBB-NEXT:    ret
   %notmask = xor i32 %mask, -1
   %mx = and i32 %mask, %x
   %my = and i32 %notmask, -1
@@ -937,31 +603,18 @@ define i32 @out_constant_varx_mone(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_constant_varx_mone:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_constant_varx_mone:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_constant_varx_mone:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a2, a0
-; RV32ZBB-NEXT:    not a0, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_constant_varx_mone:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a2, a0
-; RV64ZBB-NEXT:    not a0, a0
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_constant_varx_mone:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a0, a0
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    not a0, a0
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_constant_varx_mone:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a2, a0
+; CHECK-ZBB-NEXT:    not a0, a0
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 %x, -1 ; %x
   %n1 = and i32 %n0, %mask
   %r = xor i32 %n1, -1
@@ -983,35 +636,20 @@ define i32 @out_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
 
 ; This is not a canonical form. Testing for completeness only.
 define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_constant_varx_mone_invmask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a1, a2
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    and a0, a0, a1
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_constant_varx_mone_invmask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a1, a2
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_constant_varx_mone_invmask:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    not a0, a0
-; RV32ZBB-NEXT:    andn a0, a0, a2
-; RV32ZBB-NEXT:    not a0, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_constant_varx_mone_invmask:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    not a0, a0
-; RV64ZBB-NEXT:    andn a0, a0, a2
-; RV64ZBB-NEXT:    not a0, a0
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_constant_varx_mone_invmask:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a1, a2
+; CHECK-I-NEXT:    not a0, a0
+; CHECK-I-NEXT:    and a0, a0, a1
+; CHECK-I-NEXT:    not a0, a0
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_constant_varx_mone_invmask:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    not a0, a0
+; CHECK-ZBB-NEXT:    andn a0, a0, a2
+; CHECK-ZBB-NEXT:    not a0, a0
+; CHECK-ZBB-NEXT:    ret
   %notmask = xor i32 %mask, -1
   %n0 = xor i32 %x, -1 ; %x
   %n1 = and i32 %n0, %notmask
@@ -1020,37 +658,21 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @out_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: out_constant_varx_42:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a1, a2
-; RV32I-NEXT:    and a0, a2, a0
-; RV32I-NEXT:    andi a1, a1, 42
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: out_constant_varx_42:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a1, a2
-; RV64I-NEXT:    and a0, a2, a0
-; RV64I-NEXT:    andi a1, a1, 42
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: out_constant_varx_42:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a0, a2, a0
-; RV32ZBB-NEXT:    li a1, 42
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: out_constant_varx_42:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a0, a2, a0
-; RV64ZBB-NEXT:    li a1, 42
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: out_constant_varx_42:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a1, a2
+; CHECK-I-NEXT:    and a0, a2, a0
+; CHECK-I-NEXT:    andi a1, a1, 42
+; CHECK-I-NEXT:    or a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: out_constant_varx_42:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a0, a2, a0
+; CHECK-ZBB-NEXT:    li a1, 42
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %notmask = xor i32 %mask, -1
   %mx = and i32 %mask, %x
   %my = and i32 %notmask, 42
@@ -1059,33 +681,19 @@ define i32 @out_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_constant_varx_42:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xori a0, a0, 42
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    xori a0, a0, 42
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_constant_varx_42:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xori a0, a0, 42
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    xori a0, a0, 42
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_constant_varx_42:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a2, a0
-; RV32ZBB-NEXT:    ori a1, a2, 42
-; RV32ZBB-NEXT:    andn a0, a1, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_constant_varx_42:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a2, a0
-; RV64ZBB-NEXT:    ori a1, a2, 42
-; RV64ZBB-NEXT:    andn a0, a1, a0
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_constant_varx_42:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xori a0, a0, 42
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    xori a0, a0, 42
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_constant_varx_42:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a2, a0
+; CHECK-ZBB-NEXT:    ori a1, a2, 42
+; CHECK-ZBB-NEXT:    andn a0, a1, a0
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 %x, 42 ; %x
   %n1 = and i32 %n0, %mask
   %r = xor i32 %n1, 42
@@ -1094,35 +702,20 @@ define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
 
 ; This is not a canonical form. Testing for completeness only.
 define i32 @out_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: out_constant_varx_42_invmask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a1, a2
-; RV32I-NEXT:    and a0, a1, a0
-; RV32I-NEXT:    andi a1, a2, 42
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: out_constant_varx_42_invmask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a1, a2
-; RV64I-NEXT:    and a0, a1, a0
-; RV64I-NEXT:    andi a1, a2, 42
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: out_constant_varx_42_invmask:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a0, a2
-; RV32ZBB-NEXT:    andi a1, a2, 42
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: out_constant_varx_42_invmask:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a0, a2
-; RV64ZBB-NEXT:    andi a1, a2, 42
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: out_constant_varx_42_invmask:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a1, a2
+; CHECK-I-NEXT:    and a0, a1, a0
+; CHECK-I-NEXT:    andi a1, a2, 42
+; CHECK-I-NEXT:    or a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: out_constant_varx_42_invmask:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a0, a2
+; CHECK-ZBB-NEXT:    andi a1, a2, 42
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %notmask = xor i32 %mask, -1
   %mx = and i32 %notmask, %x
   %my = and i32 %mask, 42
@@ -1132,35 +725,20 @@ define i32 @out_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
 
 ; This is not a canonical form. Testing for completeness only.
 define i32 @in_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_constant_varx_42_invmask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a1, a2
-; RV32I-NEXT:    xori a0, a0, 42
-; RV32I-NEXT:    and a0, a0, a1
-; RV32I-NEXT:    xori a0, a0, 42
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_constant_varx_42_invmask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a1, a2
-; RV64I-NEXT:    xori a0, a0, 42
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    xori a0, a0, 42
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_constant_varx_42_invmask:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a0, a2
-; RV32ZBB-NEXT:    andi a1, a2, 42
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_constant_varx_42_invmask:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a0, a2
-; RV64ZBB-NEXT:    andi a1, a2, 42
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_constant_varx_42_invmask:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a1, a2
+; CHECK-I-NEXT:    xori a0, a0, 42
+; CHECK-I-NEXT:    and a0, a0, a1
+; CHECK-I-NEXT:    xori a0, a0, 42
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_constant_varx_42_invmask:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a0, a2
+; CHECK-ZBB-NEXT:    andi a1, a2, 42
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %notmask = xor i32 %mask, -1
   %n0 = xor i32 %x, 42 ; %x
   %n1 = and i32 %n0, %notmask
@@ -1181,31 +759,18 @@ define i32 @out_constant_mone_vary(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @in_constant_mone_vary(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_constant_mone_vary:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a1
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_constant_mone_vary:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a1
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_constant_mone_vary:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a2, a1
-; RV32ZBB-NEXT:    xor a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_constant_mone_vary:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a2, a1
-; RV64ZBB-NEXT:    xor a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_constant_mone_vary:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a0, a1
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_constant_mone_vary:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a2, a1
+; CHECK-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 -1, %y ; %x
   %n1 = and i32 %n0, %mask
   %r = xor i32 %n1, %y
@@ -1214,31 +779,18 @@ define i32 @in_constant_mone_vary(i32 %x, i32 %y, i32 %mask) {
 
 ; This is not a canonical form. Testing for completeness only.
 define i32 @out_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: out_constant_mone_vary_invmask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a2
-; RV32I-NEXT:    and a1, a2, a1
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: out_constant_mone_vary_invmask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a2
-; RV64I-NEXT:    and a1, a2, a1
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: out_constant_mone_vary_invmask:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a0, a2, a1
-; RV32ZBB-NEXT:    orn a0, a0, a2
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: out_constant_mone_vary_invmask:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a0, a2, a1
-; RV64ZBB-NEXT:    orn a0, a0, a2
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: out_constant_mone_vary_invmask:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a0, a2
+; CHECK-I-NEXT:    and a1, a2, a1
+; CHECK-I-NEXT:    or a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: out_constant_mone_vary_invmask:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a0, a2, a1
+; CHECK-ZBB-NEXT:    orn a0, a0, a2
+; CHECK-ZBB-NEXT:    ret
   %notmask = xor i32 %mask, -1
   %mx = and i32 %notmask, -1
   %my = and i32 %mask, %y
@@ -1248,35 +800,20 @@ define i32 @out_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) {
 
 ; This is not a canonical form. Testing for completeness only.
 define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_constant_mone_vary_invmask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a2
-; RV32I-NEXT:    not a2, a1
-; RV32I-NEXT:    and a0, a2, a0
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_constant_mone_vary_invmask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a2
-; RV64I-NEXT:    not a2, a1
-; RV64I-NEXT:    and a0, a2, a0
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_constant_mone_vary_invmask:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    not a0, a1
-; RV32ZBB-NEXT:    andn a0, a0, a2
-; RV32ZBB-NEXT:    xor a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_constant_mone_vary_invmask:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    not a0, a1
-; RV64ZBB-NEXT:    andn a0, a0, a2
-; RV64ZBB-NEXT:    xor a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_constant_mone_vary_invmask:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a0, a2
+; CHECK-I-NEXT:    not a2, a1
+; CHECK-I-NEXT:    and a0, a2, a0
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_constant_mone_vary_invmask:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    not a0, a1
+; CHECK-ZBB-NEXT:    andn a0, a0, a2
+; CHECK-ZBB-NEXT:    xor a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %notmask = xor i32 %mask, -1
   %n0 = xor i32 -1, %y ; %x
   %n1 = and i32 %n0, %notmask
@@ -1285,35 +822,20 @@ define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @out_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: out_constant_42_vary:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a2
-; RV32I-NEXT:    andi a2, a2, 42
-; RV32I-NEXT:    and a0, a0, a1
-; RV32I-NEXT:    or a0, a2, a0
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: out_constant_42_vary:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a2
-; RV64I-NEXT:    andi a2, a2, 42
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    or a0, a2, a0
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: out_constant_42_vary:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andi a0, a2, 42
-; RV32ZBB-NEXT:    andn a1, a1, a2
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: out_constant_42_vary:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andi a0, a2, 42
-; RV64ZBB-NEXT:    andn a1, a1, a2
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: out_constant_42_vary:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a0, a2
+; CHECK-I-NEXT:    andi a2, a2, 42
+; CHECK-I-NEXT:    and a0, a0, a1
+; CHECK-I-NEXT:    or a0, a2, a0
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: out_constant_42_vary:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andi a0, a2, 42
+; CHECK-ZBB-NEXT:    andn a1, a1, a2
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %notmask = xor i32 %mask, -1
   %mx = and i32 %mask, 42
   %my = and i32 %notmask, %y
@@ -1322,33 +844,19 @@ define i32 @out_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
 }
 
 define i32 @in_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_constant_42_vary:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xori a0, a1, 42
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_constant_42_vary:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    xori a0, a1, 42
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_constant_42_vary:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a1, a2
-; RV32ZBB-NEXT:    andi a1, a2, 42
-; RV32ZBB-NEXT:    or a0, a1, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_constant_42_vary:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a1, a2
-; RV64ZBB-NEXT:    andi a1, a2, 42
-; RV64ZBB-NEXT:    or a0, a1, a0
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_constant_42_vary:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    xori a0, a1, 42
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_constant_42_vary:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a1, a2
+; CHECK-ZBB-NEXT:    andi a1, a2, 42
+; CHECK-ZBB-NEXT:    or a0, a1, a0
+; CHECK-ZBB-NEXT:    ret
   %n0 = xor i32 42, %y ; %x
   %n1 = and i32 %n0, %mask
   %r = xor i32 %n1, %y
@@ -1357,37 +865,21 @@ define i32 @in_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
 
 ; This is not a canonical form. Testing for completeness only.
 define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: out_constant_42_vary_invmask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a2
-; RV32I-NEXT:    andi a0, a0, 42
-; RV32I-NEXT:    and a1, a2, a1
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: out_constant_42_vary_invmask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a2
-; RV64I-NEXT:    andi a0, a0, 42
-; RV64I-NEXT:    and a1, a2, a1
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: out_constant_42_vary_invmask:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    li a0, 42
-; RV32ZBB-NEXT:    andn a0, a0, a2
-; RV32ZBB-NEXT:    and a1, a2, a1
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: out_constant_42_vary_invmask:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    li a0, 42
-; RV64ZBB-NEXT:    andn a0, a0, a2
-; RV64ZBB-NEXT:    and a1, a2, a1
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: out_constant_42_vary_invmask:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a0, a2
+; CHECK-I-NEXT:    andi a0, a0, 42
+; CHECK-I-NEXT:    and a1, a2, a1
+; CHECK-I-NEXT:    or a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: out_constant_42_vary_invmask:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    li a0, 42
+; CHECK-ZBB-NEXT:    andn a0, a0, a2
+; CHECK-ZBB-NEXT:    and a1, a2, a1
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %notmask = xor i32 %mask, -1
   %mx = and i32 %notmask, 42
   %my = and i32 %mask, %y
@@ -1397,35 +889,20 @@ define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) {
 
 ; This is not a canonical form. Testing for completeness only.
 define i32 @in_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) {
-; RV32I-LABEL: in_constant_42_vary_invmask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a2
-; RV32I-NEXT:    xori a2, a1, 42
-; RV32I-NEXT:    and a0, a2, a0
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: in_constant_42_vary_invmask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a2
-; RV64I-NEXT:    xori a2, a1, 42
-; RV64I-NEXT:    and a0, a2, a0
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: in_constant_42_vary_invmask:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a2, a1
-; RV32ZBB-NEXT:    ori a1, a2, 42
-; RV32ZBB-NEXT:    andn a0, a1, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: in_constant_42_vary_invmask:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a2, a1
-; RV64ZBB-NEXT:    ori a1, a2, 42
-; RV64ZBB-NEXT:    andn a0, a1, a0
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: in_constant_42_vary_invmask:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    not a0, a2
+; CHECK-I-NEXT:    xori a2, a1, 42
+; CHECK-I-NEXT:    and a0, a2, a0
+; CHECK-I-NEXT:    xor a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: in_constant_42_vary_invmask:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    andn a0, a2, a1
+; CHECK-ZBB-NEXT:    ori a1, a2, 42
+; CHECK-ZBB-NEXT:    andn a0, a1, a0
+; CHECK-ZBB-NEXT:    ret
   %notmask = xor i32 %mask, -1
   %n0 = xor i32 42, %y ; %x
   %n1 = and i32 %n0, %notmask
@@ -1525,35 +1002,20 @@ define i32 @in_multiuse_B(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind {
 
 ; Various bad variants
 define i32 @n0_badmask(i32 %x, i32 %y, i32 %mask, i32 %mask2) {
-; RV32I-LABEL: n0_badmask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    not a2, a3
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV64I-LABEL: n0_badmask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    not a2, a3
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV32ZBB-LABEL: n0_badmask:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    and a0, a0, a2
-; RV32ZBB-NEXT:    andn a1, a1, a3
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV64ZBB-LABEL: n0_badmask:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    andn a1, a1, a3
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
+; CHECK-I-LABEL: n0_badmask:
+; CHECK-I:       # %bb.0:
+; CHECK-I-NEXT:    and a0, a0, a2
+; CHECK-I-NEXT:    not a2, a3
+; CHECK-I-NEXT:    and a1, a1, a2
+; CHECK-I-NEXT:    or a0, a0, a1
+; CHECK-I-NEXT:    ret
+;
+; CHECK-ZBB-LABEL: n0_badmask:
+; CHECK-ZBB:       # %bb.0:
+; CHECK-ZBB-NEXT:    and a0, a0, a2
+; CHECK-ZBB-NEXT:    andn a1, a1, a3
+; CHECK-ZBB-NEXT:    or a0, a0, a1
+; CHECK-ZBB-NEXT:    ret
   %mx = and i32 %x, %mask
   %notmask = xor i32 %mask2, -1 ; %mask2 instead of %mask
   %my = and i32 %y, %notmask


        


More information about the llvm-commits mailing list