[llvm] [RISCV][GISEL] Add support for scalable vector types in lowerReturnVal (PR #71587)

Tue Nov 7 12:56:58 PST 2023

llvmbot wrote:



@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-llvm-globalisel

Author: Michael Maitland (michaelmaitland)

<details>
<summary>Changes</summary>

Scalable vector types from LLVM IR are lowered into physical vector registers in MIR based on calling convention for return instructions.

---

Patch is 36.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71587.diff


8 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+1-1) 
- (modified) llvm/lib/CodeGen/LowLevelType.cpp (+1-1) 
- (modified) llvm/lib/CodeGen/MachineVerifier.cpp (+11) 
- (modified) llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp (+25) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+5-2) 
- (added) llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret-bf16-err.ll (+14) 
- (added) llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret-f16-err.ll (+14) 
- (added) llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret.ll (+809) 


``````````diff

diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 178f1fc9313d71b..d7ea25838058e4b 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -359,7 +359,7 @@ bool IRTranslator::translateCompare(const User &U,
 bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
   const ReturnInst &RI = cast<ReturnInst>(U);
   const Value *Ret = RI.getReturnValue();
-  if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
+  if (Ret && DL->getTypeStoreSize(Ret->getType()).isZero())
     Ret = nullptr;
 
   ArrayRef<Register> VRegs;
diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp
index 24c30b756737b20..cd85bf606989f9e 100644
--- a/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/llvm/lib/CodeGen/LowLevelType.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
 
 LLT::LLT(MVT VT) {
   if (VT.isVector()) {
-    bool asVector = VT.getVectorMinNumElements() > 1;
+    bool asVector = VT.getVectorMinNumElements() > 1 || VT.isScalableVector();
     init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector,
          VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(),
          /*AddressSpace=*/0);
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index dc15f0d3b842304..72aacdf12e6e8d2 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1953,6 +1953,14 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
         DstSize = TRI->getRegSizeInBits(*DstRC);
     }
 
+    // The next two checks allow COPY between physical and virtual registers,
+    // when the virtual register has a scalable size and the physical register
+    // has a fixed size. These checks allow COPY between *potentialy* mismatched
+    // sizes. However, once RegisterBankSelection occurs, MachineVerifier should
+    // be able to resolve a fixed size for the scalable vector, and at that
+    // point this function will know for sure whether the sizes are mismatched
+    // and correctly report a size mismatch.
+
     // If this is a copy from physical register to virtual register, and if the
     // Dst is scalable and the Src is fixed, then the Dst can only hold the Src
     // if the minimum size Dst can hold is at least as big as Src.
@@ -1960,6 +1968,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
         !SrcSize.isScalable() &&
         DstSize.getKnownMinValue() <= SrcSize.getFixedValue())
       break;
+    if (SrcReg.isVirtual() && DstReg.isPhysical() && SrcSize.isScalable() &&
+        !DstSize.isScalable())
+      break;
 
     if (SrcSize.isNonZero() && DstSize.isNonZero() && SrcSize != DstSize) {
       if (!DstOp.getSubReg() && !SrcOp.getSubReg()) {
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index a1dbc21ca364666..1edcb34539a49d9 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -301,6 +301,27 @@ struct RISCVCallReturnHandler : public RISCVIncomingValueHandler {
 RISCVCallLowering::RISCVCallLowering(const RISCVTargetLowering &TLI)
     : CallLowering(&TLI) {}
 
+/// Return true if scalable vector with ScalarTy is legal for lowering.
+static bool isLegalElementTypeForRVV(Type *EltTy,
+                                     const RISCVSubtarget &Subtarget) {
+  if (EltTy->isPointerTy())
+    return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
+  if (EltTy->isIntegerTy(1) || EltTy->isIntegerTy(8) ||
+      EltTy->isIntegerTy(16) || EltTy->isIntegerTy(32))
+    return true;
+  if (EltTy->isIntegerTy(64))
+    return Subtarget.hasVInstructionsI64();
+  if (EltTy->isHalfTy())
+    return Subtarget.hasVInstructionsF16();
+  if (EltTy->isBFloatTy())
+    return Subtarget.hasVInstructionsBF16();
+  if (EltTy->isFloatTy())
+    return Subtarget.hasVInstructionsF32();
+  if (EltTy->isDoubleTy())
+    return Subtarget.hasVInstructionsF64();
+  return false;
+}
+
 // TODO: Support all argument types.
 static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget) {
   // TODO: Integers larger than 2*XLen are passed indirectly which is not
@@ -336,6 +357,10 @@ static bool isSupportedReturnType(Type *T, const RISCVSubtarget &Subtarget) {
     return true;
   }
 
+  if (T->isVectorTy() && Subtarget.hasVInstructions() && T->isScalableTy() &&
+      isLegalElementTypeForRVV(T->getScalarType(), Subtarget))
+    return true;
+
   return false;
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 26475d94aef02e3..d82d41ab95e6ff7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19600,12 +19600,15 @@ unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
 }
 
 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
-  // We don't support scalable vectors in GISel.
+  // At the moment, the only scalable instruction GISel knows how to lower is
+  // ret with scalable argument.
+
   if (Inst.getType()->isScalableTy())
     return true;
 
   for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
-    if (Inst.getOperand(i)->getType()->isScalableTy())
+    if (Inst.getOperand(i)->getType()->isScalableTy() &&
+        !isa<ReturnInst>(&Inst))
       return true;
 
   if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret-bf16-err.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret-bf16-err.ll
new file mode 100644
index 000000000000000..c968d0726317f19
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret-bf16-err.ll
@@ -0,0 +1,14 @@
+; RUN: not --crash llc -mtriple=riscv32 -mattr=+v -global-isel -stop-after=irtranslator \
+; RUN:   -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not --crash llc -mtriple=riscv64 -mattr=+v -global-isel -stop-after=irtranslator \
+; RUN:   -verify-machineinstrs < %s 2>&1 | FileCheck %s
+
+; The purpose of this test is to show that the compiler throws an error when
+; there is no support for bf16 vectors. If the compiler did not throw an error,
+; then it will try to scalarize the argument to an s32, which may drop elements.
+define <vscale x 1 x bfloat> @test_ret_nxv1bf16() {
+entry:
+  ret <vscale x 1 x bfloat> undef
+}
+
+; CHECK: LLVM ERROR: unable to translate instruction: ret (in function: test_ret_nxv1bf16)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret-f16-err.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret-f16-err.ll
new file mode 100644
index 000000000000000..f87ca94ceb4f103
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret-f16-err.ll
@@ -0,0 +1,14 @@
+; RUN: not --crash llc -mtriple=riscv32 -mattr=+v -global-isel -stop-after=irtranslator \
+; RUN:   -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not --crash llc -mtriple=riscv64 -mattr=+v -global-isel -stop-after=irtranslator \
+; RUN:   -verify-machineinstrs < %s 2>&1 | FileCheck %s
+
+; The purpose of this test is to show that the compiler throws an error when
+; there is no support for f16 vectors. If the compiler did not throw an error,
+; then it will try to scalarize the argument to an s32, which may drop elements.
+define <vscale x 1 x half> @test_ret_nxv1f16() {
+entry:
+  ret <vscale x 1 x half> undef
+}
+
+; CHECK: LLVM ERROR: unable to translate instruction: ret (in function: test_ret_nxv1f16)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret.ll
new file mode 100644
index 000000000000000..eec9969063c87a5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret.ll
@@ -0,0 +1,809 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfmin,+zvfh -global-isel -stop-after=irtranslator \
+; RUN:   -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32 %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfmin,+zvfh -global-isel -stop-after=irtranslator \
+; RUN:   -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64 %s
+
+; ==========================================================================
+; ============================= Scalable Types =============================
+; ==========================================================================
+
+define <vscale x 1 x i8> @test_ret_nxv1i8() {
+  ; RV32-LABEL: name: test_ret_nxv1i8
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8 = COPY [[DEF]](<vscale x 1 x s8>)
+  ; RV32-NEXT:   PseudoRET implicit $v8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv1i8
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8 = COPY [[DEF]](<vscale x 1 x s8>)
+  ; RV64-NEXT:   PseudoRET implicit $v8
+entry:
+  ret <vscale x 1 x i8> undef
+}
+
+define <vscale x 2 x i8> @test_ret_nxv2i8() {
+  ; RV32-LABEL: name: test_ret_nxv2i8
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8 = COPY [[DEF]](<vscale x 2 x s8>)
+  ; RV32-NEXT:   PseudoRET implicit $v8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv2i8
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8 = COPY [[DEF]](<vscale x 2 x s8>)
+  ; RV64-NEXT:   PseudoRET implicit $v8
+entry:
+  ret <vscale x 2 x i8> undef
+}
+
+define <vscale x 4 x i8> @test_ret_nxv4i8() {
+  ; RV32-LABEL: name: test_ret_nxv4i8
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8 = COPY [[DEF]](<vscale x 4 x s8>)
+  ; RV32-NEXT:   PseudoRET implicit $v8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv4i8
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8 = COPY [[DEF]](<vscale x 4 x s8>)
+  ; RV64-NEXT:   PseudoRET implicit $v8
+entry:
+  ret <vscale x 4 x i8> undef
+}
+
+define <vscale x 8 x i8> @test_ret_nxv8i8() {
+  ; RV32-LABEL: name: test_ret_nxv8i8
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8 = COPY [[DEF]](<vscale x 8 x s8>)
+  ; RV32-NEXT:   PseudoRET implicit $v8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv8i8
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8 = COPY [[DEF]](<vscale x 8 x s8>)
+  ; RV64-NEXT:   PseudoRET implicit $v8
+entry:
+  ret <vscale x 8 x i8> undef
+}
+
+define <vscale x 16 x i8> @test_ret_nxv16i8() {
+  ; RV32-LABEL: name: test_ret_nxv16i8
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m2 = COPY [[DEF]](<vscale x 16 x s8>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m2
+  ;
+  ; RV64-LABEL: name: test_ret_nxv16i8
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8m2 = COPY [[DEF]](<vscale x 16 x s8>)
+  ; RV64-NEXT:   PseudoRET implicit $v8m2
+entry:
+  ret <vscale x 16 x i8> undef
+}
+
+define <vscale x 32 x i8> @test_ret_nxv32i8() {
+  ; RV32-LABEL: name: test_ret_nxv32i8
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 32 x s8>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m4 = COPY [[DEF]](<vscale x 32 x s8>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m4
+  ;
+  ; RV64-LABEL: name: test_ret_nxv32i8
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 32 x s8>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8m4 = COPY [[DEF]](<vscale x 32 x s8>)
+  ; RV64-NEXT:   PseudoRET implicit $v8m4
+entry:
+  ret <vscale x 32 x i8> undef
+}
+
+define <vscale x 64 x i8> @test_ret_nxv64i8() {
+  ; RV32-LABEL: name: test_ret_nxv64i8
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 64 x s8>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m8 = COPY [[DEF]](<vscale x 64 x s8>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv64i8
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 64 x s8>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8m8 = COPY [[DEF]](<vscale x 64 x s8>)
+  ; RV64-NEXT:   PseudoRET implicit $v8m8
+entry:
+  ret <vscale x 64 x i8> undef
+}
+
+define <vscale x 1 x i16> @test_ret_nxv1i16() {
+  ; RV32-LABEL: name: test_ret_nxv1i16
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8 = COPY [[DEF]](<vscale x 1 x s16>)
+  ; RV32-NEXT:   PseudoRET implicit $v8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv1i16
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8 = COPY [[DEF]](<vscale x 1 x s16>)
+  ; RV64-NEXT:   PseudoRET implicit $v8
+entry:
+  ret <vscale x 1 x i16> undef
+}
+
+define <vscale x 2 x i16> @test_ret_nxv2i16() {
+  ; RV32-LABEL: name: test_ret_nxv2i16
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8 = COPY [[DEF]](<vscale x 2 x s16>)
+  ; RV32-NEXT:   PseudoRET implicit $v8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv2i16
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8 = COPY [[DEF]](<vscale x 2 x s16>)
+  ; RV64-NEXT:   PseudoRET implicit $v8
+entry:
+  ret <vscale x 2 x i16> undef
+}
+
+define <vscale x 4 x i16> @test_ret_nxv4i16() {
+  ; RV32-LABEL: name: test_ret_nxv4i16
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8 = COPY [[DEF]](<vscale x 4 x s16>)
+  ; RV32-NEXT:   PseudoRET implicit $v8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv4i16
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8 = COPY [[DEF]](<vscale x 4 x s16>)
+  ; RV64-NEXT:   PseudoRET implicit $v8
+entry:
+  ret <vscale x 4 x i16> undef
+}
+
+define <vscale x 8 x i16> @test_ret_nxv8i16() {
+  ; RV32-LABEL: name: test_ret_nxv8i16
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m2 = COPY [[DEF]](<vscale x 8 x s16>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m2
+  ;
+  ; RV64-LABEL: name: test_ret_nxv8i16
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8m2 = COPY [[DEF]](<vscale x 8 x s16>)
+  ; RV64-NEXT:   PseudoRET implicit $v8m2
+entry:
+  ret <vscale x 8 x i16> undef
+}
+
+define <vscale x 16 x i16> @test_ret_nxv16i16() {
+  ; RV32-LABEL: name: test_ret_nxv16i16
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m4 = COPY [[DEF]](<vscale x 16 x s16>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m4
+  ;
+  ; RV64-LABEL: name: test_ret_nxv16i16
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8m4 = COPY [[DEF]](<vscale x 16 x s16>)
+  ; RV64-NEXT:   PseudoRET implicit $v8m4
+entry:
+  ret <vscale x 16 x i16> undef
+}
+
+define <vscale x 32 x i16> @test_ret_nxv32i16() {
+  ; RV32-LABEL: name: test_ret_nxv32i16
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 32 x s16>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m8 = COPY [[DEF]](<vscale x 32 x s16>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv32i16
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 32 x s16>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8m8 = COPY [[DEF]](<vscale x 32 x s16>)
+  ; RV64-NEXT:   PseudoRET implicit $v8m8
+entry:
+  ret <vscale x 32 x i16> undef
+}
+
+define <vscale x 1 x i32> @test_ret_nxv1i32() {
+  ; RV32-LABEL: name: test_ret_nxv1i32
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8 = COPY [[DEF]](<vscale x 1 x s32>)
+  ; RV32-NEXT:   PseudoRET implicit $v8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv1i32
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8 = COPY [[DEF]](<vscale x 1 x s32>)
+  ; RV64-NEXT:   PseudoRET implicit $v8
+entry:
+  ret <vscale x 1 x i32> undef
+}
+
+define <vscale x 2 x i32> @test_ret_nxv2i32() {
+  ; RV32-LABEL: name: test_ret_nxv2i32
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8 = COPY [[DEF]](<vscale x 2 x s32>)
+  ; RV32-NEXT:   PseudoRET implicit $v8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv2i32
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8 = COPY [[DEF]](<vscale x 2 x s32>)
+  ; RV64-NEXT:   PseudoRET implicit $v8
+entry:
+  ret <vscale x 2 x i32> undef
+}
+
+define <vscale x 4 x i32> @test_ret_nxv4i32() {
+  ; RV32-LABEL: name: test_ret_nxv4i32
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m2 = COPY [[DEF]](<vscale x 4 x s32>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m2
+  ;
+  ; RV64-LABEL: name: test_ret_nxv4i32
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8m2 = COPY [[DEF]](<vscale x 4 x s32>)
+  ; RV64-NEXT:   PseudoRET implicit $v8m2
+entry:
+  ret <vscale x 4 x i32> undef
+}
+
+define <vscale x 8 x i32> @test_ret_nxv8i32() {
+  ; RV32-LABEL: name: test_ret_nxv8i32
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m4 = COPY [[DEF]](<vscale x 8 x s32>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m4
+  ;
+  ; RV64-LABEL: name: test_ret_nxv8i32
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8m4 = COPY [[DEF]](<vscale x 8 x s32>)
+  ; RV64-NEXT:   PseudoRET implicit $v8m4
+entry:
+  ret <vscale x 8 x i32> undef
+}
+
+define <vscale x 16 x i32> @test_ret_nxv16i32() {
+  ; RV32-LABEL: name: test_ret_nxv16i32
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m8 = COPY [[DEF]](<vscale x 16 x s32>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv16i32
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8m8 = COPY [[DEF]](<vscale x 16 x s32>)
+  ; RV64-NEXT:   PseudoRET implicit $v8m8
+entry:
+  ret <vscale x 16 x i32> undef
+}
+
+define <vscale x 1 x i64> @test_ret_nxv1i64() {
+  ; RV32-LABEL: name: test_ret_nxv1i64
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8 = COPY [[DEF]](<vscale x 1 x s64>)
+  ; RV32-NEXT:   PseudoRET implicit $v8
+  ;
+  ; RV64-LABEL: name: test_ret_nxv1i64
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8 = COPY [[DEF]](<vscale x 1 x s64>)
+  ; RV64-NEXT:   PseudoRET implicit $v8
+entry:
+  ret <vscale x 1 x i64> undef
+}
+
+define <vscale x 2 x i64> @test_ret_nxv2i64() {
+  ; RV32-LABEL: name: test_ret_nxv2i64
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m2 = COPY [[DEF]](<vscale x 2 x s64>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m2
+  ;
+  ; RV64-LABEL: name: test_ret_nxv2i64
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+  ; RV64-NEXT:   $v8m2 = COPY [[DEF]](<vscale x 2 x s64>)
+  ; RV64-NEXT:   PseudoRET implicit $v8m2
+entry:
+  ret <vscale x 2 x i64> undef
+}
+
+define <vscale x 4 x i64> @test_ret_nxv4i64() {
+  ; RV32-LABEL: name: test_ret_nxv4i64
+  ; RV32: bb.1.entry:
+  ; RV32-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+  ; RV32-NEXT:   $v8m4 = COPY [[DEF]](<vscale x 4 x s64>)
+  ; RV32-NEXT:   PseudoRET implicit $v8m4
+  ;
+  ; RV64-LABEL: name: test_ret_nxv4i64
+  ; RV64: bb.1.entry:
+  ; RV64-NEXT:   [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+  ; RV6...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/71587