[llvm] [RISCV] Disable splitValueIntoRegisterParts/joinRegisterPartsIntoValue for scalable vectors with same element count. (PR #171243)

Mon Dec 8 17:47:18 PST 2025

https://github.com/topperc created https://github.com/llvm/llvm-project/pull/171243

This code was incorrectly handling v16i4->v16i8 arguments/returns which should be
an any_extend to match type legalization. Instead we widened
v16i4 -> v32i4 then bit casted to v16i8. This merged pairs of i4
elements into an i8 element instead of keeping them as separate
elements that have been extended to i8.

I think the scalable vector handling in these functions is a hack
for inline assembly of fractional LMUL types. I'm not sure there
aren't more bugs here. The fundamental issue is that inline assembly
uses the first type from the register class as the PartVT. For LMUL=1
and fractional LMUL types we use the VR register class which has
nxv8i8 as the first type. So we have to convert all fractional types
to nxv8i8, but we don't know we're handling inline assembly. If it's
an argument or return, widening and bitcasting may not be correct.
    
Fixes the first issue in #171141.

>From 9f6186c41f8dfbbf4e313cbcba807425f7f494f5 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 8 Dec 2025 14:07:45 -0800
Subject: [PATCH 1/3] [RISCV] Add VMNoV0 register class with only the VMaskVTs.

I plan to use this for inline assembly "vd" contraints with mask
types in a follow up patch. Due to the test changes I wanted to
post this separately.
---
 llvm/lib/Target/RISCV/RISCVRegisterInfo.td    |  1 +
 .../instruction-select/rvv/select.mir         | 20 +++++++++----------
 .../RISCV/rvv/pass-fast-math-flags-sdnode.ll  |  2 +-
 .../RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir  |  4 ++--
 .../rvv/strided-vpload-vpstore-output.ll      |  2 +-
 .../RISCV/rvv/vleff-vlseg2ff-output.ll        |  4 ++--
 .../CodeGen/RISCV/rvv/vmerge-peephole.mir     |  4 ++--
 .../CodeGen/RISCV/rvv/vmv.v.v-peephole.mir    |  6 +++---
 .../RISCV/rvv/vsetvli-insert-crossbb.mir      |  4 ++--
 9 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 11b7a0a3c691a..f354793eb0eac 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -813,6 +813,7 @@ def VMV0 : VReg<VMaskVTs, (add V0), 1>;
 
 // The register class is added for inline assembly for vector mask types.
 def VM : VReg<VMaskVTs, (add VR), 1>;
+def VMNoV0 : VReg<VMaskVTs, (sub VR, V0), 1>;
 
 defvar VTupM1N2VTs = [riscv_nxv8i8x2, riscv_nxv4i8x2, riscv_nxv2i8x2, riscv_nxv1i8x2];
 defvar VTupM1N3VTs = [riscv_nxv8i8x3, riscv_nxv4i8x3, riscv_nxv2i8x3, riscv_nxv1i8x3];
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir
index ada76a43639d7..b7cb295648b4e 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir
@@ -11,7 +11,7 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: select_nxv1i8
     ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
-    ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
+    ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vmnov0 = IMPLICIT_DEF
     ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
     ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -19,7 +19,7 @@ body:             |
     ;
     ; RV64I-LABEL: name: select_nxv1i8
     ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
-    ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
+    ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vmnov0 = IMPLICIT_DEF
     ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
     ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -40,7 +40,7 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: select_nxv4i8
     ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
-    ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
+    ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vmnov0 = IMPLICIT_DEF
     ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
     ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -48,7 +48,7 @@ body:             |
     ;
     ; RV64I-LABEL: name: select_nxv4i8
     ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
-    ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
+    ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vmnov0 = IMPLICIT_DEF
     ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
     ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -98,7 +98,7 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: select_nxv64i8
     ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
-    ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
+    ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vmnov0 = IMPLICIT_DEF
     ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
     ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -106,7 +106,7 @@ body:             |
     ;
     ; RV64I-LABEL: name: select_nxv64i8
     ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
-    ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
+    ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vmnov0 = IMPLICIT_DEF
     ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
     ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -127,7 +127,7 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: select_nxv2i16
     ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
-    ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
+    ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vmnov0 = IMPLICIT_DEF
     ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
     ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -135,7 +135,7 @@ body:             |
     ;
     ; RV64I-LABEL: name: select_nxv2i16
     ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
-    ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
+    ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vmnov0 = IMPLICIT_DEF
     ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
     ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -185,7 +185,7 @@ body:             |
   bb.0.entry:
     ; RV32I-LABEL: name: select_nxv32i16
     ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
-    ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
+    ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vmnov0 = IMPLICIT_DEF
     ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
     ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
     ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]]
@@ -193,7 +193,7 @@ body:             |
     ;
     ; RV64I-LABEL: name: select_nxv32i16
     ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
-    ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
+    ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vmnov0 = IMPLICIT_DEF
     ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
     ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
     ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]]
diff --git a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll
index 0654fe8bd8d66..3225d649f066e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll
@@ -13,7 +13,7 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %x, <vscale x 1 x double
   ; CHECK-NEXT:   [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 32
   ; CHECK-NEXT:   [[SRLI:%[0-9]+]]:gprnox0 = SRLI killed [[SLLI]], 32
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:vmv0 = COPY [[COPY1]]
-  ; CHECK-NEXT:   [[PseudoVFMUL_VV_M1_E64_MASK:%[0-9]+]]:vrnov0 = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFMUL_VV_M1_E64_MASK $noreg, [[COPY3]], [[COPY2]], [[COPY4]], 7, killed [[SRLI]], 6 /* e64 */, 1 /* ta, mu */, implicit $frm
+  ; CHECK-NEXT:   [[PseudoVFMUL_VV_M1_E64_MASK:%[0-9]+]]:vmnov0 = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFMUL_VV_M1_E64_MASK $noreg, [[COPY3]], [[COPY2]], [[COPY4]], 7, killed [[SRLI]], 6 /* e64 */, 1 /* ta, mu */, implicit $frm
   ; CHECK-NEXT:   $v8 = COPY [[PseudoVFMUL_VV_M1_E64_MASK]]
   ; CHECK-NEXT:   PseudoRET implicit $v8
   %1 = call fast <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x double> %y, <vscale x 1 x i1> %m, i32 %vl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
index c73c2004834db..ece457a09dbdf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
@@ -11,7 +11,7 @@ body: |
     ; CHECK: liveins: $x1, $v8, $v9
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %false:vrnov0 = COPY $v8
-    ; CHECK-NEXT: %true:vrnov0 = COPY $v9
+    ; CHECK-NEXT: %true:vmnov0 = COPY $v9
     ; CHECK-NEXT: %avl:gprnox0 = COPY $x1
     ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 0 /* e8 */
     ; CHECK-NEXT: $v0 = COPY %mask
@@ -135,7 +135,7 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %false:vrnov0 = COPY $v8
     ; CHECK-NEXT: %mask:vmv0 = COPY $v0
-    ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 1 /* ta, mu */
+    ; CHECK-NEXT: %true:vmnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 1 /* ta, mu */
     %false:vrnov0 = COPY $v8
     %mask:vmv0 = COPY $v0
     %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload-vpstore-output.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload-vpstore-output.ll
index f087efcc5f57b..d3649ef4b6664 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload-vpstore-output.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload-vpstore-output.ll
@@ -15,7 +15,7 @@ define <vscale x 1 x i8> @strided_vpload_nxv1i8_i8(ptr %ptr, i8 signext %stride,
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $x11
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gpr = COPY $x10
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:vmv0 = COPY [[COPY1]]
-  ; CHECK-NEXT:   [[PseudoVLSE8_V_MF8_MASK:%[0-9]+]]:vrnov0 = PseudoVLSE8_V_MF8_MASK $noreg, [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 3 /* e8 */, 1 /* ta, mu */ :: (load unknown-size, align 1)
+  ; CHECK-NEXT:   [[PseudoVLSE8_V_MF8_MASK:%[0-9]+]]:vmnov0 = PseudoVLSE8_V_MF8_MASK $noreg, [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 3 /* e8 */, 1 /* ta, mu */ :: (load unknown-size, align 1)
   ; CHECK-NEXT:   $v8 = COPY [[PseudoVLSE8_V_MF8_MASK]]
   ; CHECK-NEXT:   PseudoRET implicit $v8
   %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr %ptr, i8 %stride, <vscale x 1 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll
index 6b6276b838fba..7cbaceae858b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll
@@ -42,9 +42,9 @@ define i64 @test_vleff_nxv8i8_mask(<vscale x 8 x i8> %maskedoff, ptr %p, <vscale
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gprnox0 = COPY $x11
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vr = COPY $v0
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $x10
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vmnov0 = COPY $v8
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:vmv0 = COPY [[COPY1]]
-  ; CHECK-NEXT:   [[PseudoVLE8FF_V_M1_MASK:%[0-9]+]]:vrnov0, [[PseudoVLE8FF_V_M1_MASK1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1_MASK [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 3 /* e8 */, 0 /* tu, mu */ :: (load unknown-size from %ir.p, align 1)
+  ; CHECK-NEXT:   [[PseudoVLE8FF_V_M1_MASK:%[0-9]+]]:vmnov0, [[PseudoVLE8FF_V_M1_MASK1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1_MASK [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 3 /* e8 */, 0 /* tu, mu */ :: (load unknown-size from %ir.p, align 1)
   ; CHECK-NEXT:   $x10 = COPY [[PseudoVLE8FF_V_M1_MASK1]]
   ; CHECK-NEXT:   PseudoRET implicit $x10
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir
index 81a271bd975e3..bc78a7732c15a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir
@@ -148,8 +148,8 @@ body: |
     ; CHECK-NEXT: %y:vr = COPY $v9
     ; CHECK-NEXT: %mask:vmv0 = COPY $v0
     ; CHECK-NEXT: %add0:vr = PseudoVADD_VV_M1 $noreg, %x, %y, -1, 5 /* e32 */, 3 /* ta, ma */
-    ; CHECK-NEXT: %add1:vrnov0 = COPY %add:vrnov0
-    ; CHECK-NEXT: %merge:vrnov0 = PseudoVOR_VV_M1_MASK %add:vrnov0, %add1, %y, %mask, -1, 5 /* e32 */, 1 /* ta, mu */
+    ; CHECK-NEXT: %add1:vmnov0 = COPY %add:vmnov0
+    ; CHECK-NEXT: %merge:vrnov0 = PseudoVOR_VV_M1_MASK %add:vmnov0, %add1, %y, %mask, -1, 5 /* e32 */, 1 /* ta, mu */
     %x:vr = COPY $v8
     %y:vr = COPY $v9
     %mask:vmv0 = COPY $v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
index 68e74ff6ba05b..3f551ba91b3a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
@@ -112,7 +112,7 @@ body: |
     ; CHECK-LABEL: name: diff_regclass
     ; CHECK: liveins: $v8
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[PseudoVMV_V_I_MF2_:%[0-9]+]]:vrnov0 = PseudoVMV_V_I_MF2 $noreg, 0, 0, 5 /* e32 */, 1 /* ta, mu */
+    ; CHECK-NEXT: [[PseudoVMV_V_I_MF2_:%[0-9]+]]:vmnov0 = PseudoVMV_V_I_MF2 $noreg, 0, 0, 5 /* e32 */, 1 /* ta, mu */
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vmv0 = COPY $v8
     ; CHECK-NEXT: [[PseudoVADD_VV_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVADD_VV_M1_MASK [[PseudoVMV_V_I_MF2_]], $noreg, $noreg, [[COPY]], 0, 5 /* e32 */, 0 /* tu, mu */
     %0:vr = PseudoVMV_V_I_MF2 $noreg, 0, -1, 5 /* e32 */, 0 /* tu, mu */
@@ -128,7 +128,7 @@ body: |
     ; CHECK-LABEL: name: diff_regclass_passthru
     ; CHECK: liveins: $v8
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[PseudoVMV_V_I_MF2_:%[0-9]+]]:vrnov0 = PseudoVMV_V_I_MF2 $noreg, 0, 0, 5 /* e32 */, 1 /* ta, mu */
+    ; CHECK-NEXT: [[PseudoVMV_V_I_MF2_:%[0-9]+]]:vmnov0 = PseudoVMV_V_I_MF2 $noreg, 0, 0, 5 /* e32 */, 1 /* ta, mu */
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vmv0 = COPY $v8
     ; CHECK-NEXT: [[PseudoVLSE32_V_MF2_MASK:%[0-9]+]]:vrnov0 = PseudoVLSE32_V_MF2_MASK [[PseudoVMV_V_I_MF2_]], $noreg, $noreg, [[COPY]], 0, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 4)
     %2:vr = PseudoVMV_V_I_MF2 $noreg, 0, -1, 5 /* e32 */, 0 /* tu, mu */
@@ -162,7 +162,7 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8
     ; CHECK-NEXT: %mask:vmv0 = COPY $v0
-    ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, $noreg, %mask, 4, 5 /* e32 */
+    ; CHECK-NEXT: %x:vmnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, $noreg, %mask, 4, 5 /* e32 */
     %passthru:vrnov0 = COPY $v8
     %mask:vmv0 = COPY $v0
     %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %passthru, $noreg, %mask, 4, 5 /* e32 */
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
index a35100654432c..0b242abeb035c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
@@ -793,7 +793,7 @@ body:             |
   ; CHECK-NEXT:   %idxs:vr = COPY $v0
   ; CHECK-NEXT:   %t1:vr = COPY $v1
   ; CHECK-NEXT:   %t3:vr = COPY $v2
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vrnov0 = COPY $v3
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vmnov0 = COPY $v3
   ; CHECK-NEXT:   %t5:vrnov0 = COPY $v1
   ; CHECK-NEXT:   dead [[PseudoVSETVLIX0_:%[0-9]+]]:gprnox0 = PseudoVSETVLIX0 killed $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
   ; CHECK-NEXT:   %t6:vr = PseudoVMSEQ_VI_M1 %t1, 0, -1, 6 /* e64 */, implicit $vl, implicit $vtype
@@ -811,7 +811,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $v0 = COPY %mask
   ; CHECK-NEXT:   dead $x0 = PseudoVSETVLIX0X0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl
-  ; CHECK-NEXT:   early-clobber [[COPY]]:vrnov0 = PseudoVLUXEI64_V_M1_MF8_MASK %t5, %inaddr, %idxs, $v0, -1, 3 /* e8 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
+  ; CHECK-NEXT:   early-clobber [[COPY]]:vmnov0 = PseudoVLUXEI64_V_M1_MF8_MASK %t5, %inaddr, %idxs, $v0, -1, 3 /* e8 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
   ; CHECK-NEXT:   PseudoBR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:

>From 7684b0a96e64a9feb736303e56f43eaed91b69cb Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 8 Dec 2025 15:53:46 -0800
Subject: [PATCH 2/3] [RISCV] Use VM and VMNoV0 for "vr" and "vd" inline asm
 constraints with mask type.

The inline assembly handling in SelectionDAG uses the first type
for the register class as the type at the input/output of the
inlineassembly. If this isn't the type for the surrounding DAG,
it needs to be converted.

nxv8i8 is the first type for the VR and VRNoV0 register classes.
So we currently generate insert/extract_subvector and bitcasts to
convert to/from nxv8i8.

I believe some of the special casing we have for this in
splitValueIntoRegisterParts and joinRegisterPartsIntoValue is causing
us to also generate incorrect handling for arguments with nxv16i4 types
that should be any extended to nxv16i8. Instead we widen them to nxv32i4
and bitcast to nxv16i8.

This patch uses VM and VMNoV0 for masks which has nxv64i1 as their
first type. This means we will only emit an insert/extract_subvector
without any bitcasts. This will allow me to add additional type checks to
splitValueIntoRegisterParts and joinRegisterPartsIntoValue to fix the
nxv16i4 argument issue without breaking inline assembly.

I may need to add more register classes to cover fractional LMULs,
but I'm not sure yet.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 32 +++++++++++----------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index eaf908886740c..b0fe7028f0cea 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24371,14 +24371,15 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       break;
     }
   } else if (Constraint == "vr") {
+    // Check VM first so that mask types will use that instead of VR.
     for (const auto *RC :
-         {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
-          &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
-          &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
-          &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
-          &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
-          &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
-          &RISCV::VRN2M4RegClass}) {
+         {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
+          &RISCV::VRM4RegClass, &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass,
+          &RISCV::VRN3M1RegClass, &RISCV::VRN4M1RegClass,
+          &RISCV::VRN5M1RegClass, &RISCV::VRN6M1RegClass,
+          &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass,
+          &RISCV::VRN2M2RegClass, &RISCV::VRN3M2RegClass,
+          &RISCV::VRN4M2RegClass, &RISCV::VRN2M4RegClass}) {
       if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
         return std::make_pair(0U, RC);
 
@@ -24389,15 +24390,16 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       }
     }
   } else if (Constraint == "vd") {
+    // Check VMNoV0 first so that mask types will use that instead of VRNoV0.
     for (const auto *RC :
-         {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
-          &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
-          &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
-          &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
-          &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
-          &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
-          &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
-          &RISCV::VRN2M4NoV0RegClass}) {
+         {&RISCV::VMNoV0RegClass, &RISCV::VRNoV0RegClass,
+          &RISCV::VRM2NoV0RegClass, &RISCV::VRM4NoV0RegClass,
+          &RISCV::VRM8NoV0RegClass, &RISCV::VRN2M1NoV0RegClass,
+          &RISCV::VRN3M1NoV0RegClass, &RISCV::VRN4M1NoV0RegClass,
+          &RISCV::VRN5M1NoV0RegClass, &RISCV::VRN6M1NoV0RegClass,
+          &RISCV::VRN7M1NoV0RegClass, &RISCV::VRN8M1NoV0RegClass,
+          &RISCV::VRN2M2NoV0RegClass, &RISCV::VRN3M2NoV0RegClass,
+          &RISCV::VRN4M2NoV0RegClass, &RISCV::VRN2M4NoV0RegClass}) {
       if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
         return std::make_pair(0U, RC);
 

>From 8e8f9f8e635d50282aa9037c43fe987e44b837a7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 8 Dec 2025 17:31:46 -0800
Subject: [PATCH 3/3] [RISCV] Disable
 splitValueIntoRegisterParts/joinRegisterPartsIntoValue for scalable vectors
 with same element count.

This code was incorrectly handling v16i4->v16i8 which should be
an any_extend to match type legalization. Instead we widened
v16i4 -> v32i4 then bit casted to v16i8. This merged pairs of i4
elements into an i8 element instead of keeping them as separate
elements that have been extended to i8.

I think the scalable vector handling in these functions is a hack
for inline assembly of fractional LMUL types. I'm not sure there
aren't more bugs here. The fundamental issue is that inline assembly
uses the first type from the register class as the PartVT. For LMUL=1
and fractional LMUL types we use the VR register class which has
nxv8i8 as the first type. So we have to convert all fractional types
to nxv8i8, but we don't know we're handling inline assembly. If it's
an argument or return, widening and bitcasting may not be correct.

Fixes the first issue in #171141.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  8 ++++++--
 llvm/test/CodeGen/RISCV/rvv/pr171141.ll     | 12 ++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/pr171141.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b0fe7028f0cea..30d884aafa264 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25278,7 +25278,9 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts(
   }
 
   if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
-      PartVT.isScalableVector()) {
+      PartVT.isScalableVector() &&
+      !(ValueVT.isScalableVector() &&
+        ValueVT.getVectorElementCount() == PartVT.getVectorElementCount())) {
     if (ValueVT.isFixedLengthVector()) {
       ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
       Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
@@ -25353,7 +25355,9 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
   }
 
   if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
-      PartVT.isScalableVector()) {
+      PartVT.isScalableVector() &&
+      !(ValueVT.isScalableVector() &&
+        ValueVT.getVectorElementCount() == PartVT.getVectorElementCount())) {
     LLVMContext &Context = *DAG.getContext();
     SDValue Val = Parts[0];
     EVT ValueEltVT = ValueVT.getVectorElementType();
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr171141.ll b/llvm/test/CodeGen/RISCV/rvv/pr171141.ll
new file mode 100644
index 0000000000000..5a0cb70d18b9a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr171141.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+define <vscale x 16 x i4> @foo() {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    ret
+entry:
+  ret <vscale x 16 x i4> zeroinitializer
+}