[llvm] 2c82080 - [MachineFrameInfo][RISCV] Call ensureStackAlignment for objects created with scalable vector stack id.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 20 14:12:06 PDT 2022
Author: Craig Topper
Date: 2022-10-20T14:05:46-07:00
New Revision: 2c82080f0964c45663d5599505e88f9124fc5f73
URL: https://github.com/llvm/llvm-project/commit/2c82080f0964c45663d5599505e88f9124fc5f73
DIFF: https://github.com/llvm/llvm-project/commit/2c82080f0964c45663d5599505e88f9124fc5f73.diff
LOG: [MachineFrameInfo][RISCV] Call ensureStackAlignment for objects created with scalable vector stack id.
This is an alternative to fix PR57939 for RISC-V. It definitely
can be argued that the stack temporaries for RISC-V are being created
with an unnecessarily large alignment. But ignoring the alignment
in MachineFrameInfo also seems bad.
Looking at the test update that go with the current ID==0 check,
it was intending to exclude things like the NoAlloc stackid. So I'm
not sure if scalable vectors are intentionally being excluded.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D135913
Added:
Modified:
llvm/include/llvm/CodeGen/MachineFrameInfo.h
llvm/lib/CodeGen/MachineFrameInfo.cpp
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll
llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 7ea731b466559..7d11d63d4066f 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Support/Alignment.h"
#include <cassert>
#include <vector>
@@ -486,14 +487,21 @@ class MachineFrameInfo {
return Objects[ObjectIdx + NumFixedObjects].Alignment;
}
+ /// Should this stack ID be considered in MaxAlignment.
+ bool contributesToMaxAlignment(uint8_t StackID) {
+ return StackID == TargetStackID::Default ||
+ StackID == TargetStackID::ScalableVector;
+ }
+
/// setObjectAlignment - Change the alignment of the specified stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment) {
assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
Objects[ObjectIdx + NumFixedObjects].Alignment = Alignment;
- // Only ensure max alignment for the default stack.
- if (getStackID(ObjectIdx) == 0)
+ // Only ensure max alignment for the default and scalable vector stack.
+ uint8_t StackID = getStackID(ObjectIdx);
+ if (contributesToMaxAlignment(StackID))
ensureMaxAlignment(Alignment);
}
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index f0190812389f9..daf6a218165d9 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -58,7 +58,7 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, Align Alignment,
!IsSpillSlot, StackID));
int Index = (int)Objects.size() - NumFixedObjects - 1;
assert(Index >= 0 && "Bad frame index!");
- if (StackID == 0)
+ if (contributesToMaxAlignment(StackID))
ensureMaxAlignment(Alignment);
return Index;
}
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 0dc3d29c68596..b7aefe12a9cb3 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -515,6 +515,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
if (STI.isRegisterReservedByUser(FPReg))
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
MF.getFunction(), "Frame pointer required, but has been reserved."});
+ // The frame pointer does need to be reserved from register allocation.
+ assert(MF.getRegInfo().isReserved(FPReg) && "FP not reserved");
adjustReg(MBB, MBBI, DL, FPReg, SPReg,
RealStackSize - RVFI->getVarArgsSaveSize(),
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
index 4068f141e179a..9bc394d24849d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -289,7 +289,9 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV32-NEXT: addi sp, sp, -144
; RV32-NEXT: .cfi_def_cfa_offset 144
; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: addi s0, sp, 144
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: csrr a1, vlenb
@@ -312,6 +314,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV32-NEXT: call ext2 at plt
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 144
; RV32-NEXT: ret
;
@@ -320,7 +323,9 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV64-NEXT: addi sp, sp, -144
; RV64-NEXT: .cfi_def_cfa_offset 144
; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
; RV64-NEXT: addi s0, sp, 144
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: csrr a1, vlenb
@@ -343,6 +348,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV64-NEXT: call ext2 at plt
; RV64-NEXT: addi sp, s0, -144
; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 144
; RV64-NEXT: ret
%t = call fastcc <vscale x 32 x i32> @ext2(<vscale x 32 x i32> %y, <vscale x 32 x i32> %x, i32 %w, i32 2)
@@ -355,7 +361,9 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: addi sp, sp, -144
; RV32-NEXT: .cfi_def_cfa_offset 144
; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: addi s0, sp, 144
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: csrr a1, vlenb
@@ -412,6 +420,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: call ext3 at plt
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 144
; RV32-NEXT: ret
;
@@ -420,7 +429,9 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: addi sp, sp, -144
; RV64-NEXT: .cfi_def_cfa_offset 144
; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
; RV64-NEXT: addi s0, sp, 144
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: csrr a1, vlenb
@@ -477,6 +488,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: call ext3 at plt
; RV64-NEXT: addi sp, s0, -144
; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 144
; RV64-NEXT: ret
%t = call fastcc <vscale x 32 x i32> @ext3(<vscale x 32 x i32> %z, <vscale x 32 x i32> %y, <vscale x 32 x i32> %x, i32 %w, i32 42)
@@ -509,7 +521,9 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV32-NEXT: addi sp, sp, -144
; RV32-NEXT: .cfi_def_cfa_offset 144
; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: addi s0, sp, 144
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: csrr a0, vlenb
@@ -549,6 +563,7 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV32-NEXT: call vector_arg_indirect_stack at plt
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 144
; RV32-NEXT: ret
;
@@ -557,7 +572,9 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV64-NEXT: addi sp, sp, -144
; RV64-NEXT: .cfi_def_cfa_offset 144
; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
; RV64-NEXT: addi s0, sp, 144
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: csrr a0, vlenb
@@ -597,6 +614,7 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV64-NEXT: call vector_arg_indirect_stack at plt
; RV64-NEXT: addi sp, s0, -144
; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 144
; RV64-NEXT: ret
%s = call fastcc <vscale x 32 x i32> @vector_arg_indirect_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 8)
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
index e0f245bf40040..a9f5f85709507 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
@@ -26,7 +26,9 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV32-NEXT: addi sp, sp, -144
; RV32-NEXT: .cfi_def_cfa_offset 144
; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: addi s0, sp, 144
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: csrr a0, vlenb
@@ -46,6 +48,7 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV32-NEXT: call callee_scalable_vector_split_indirect at plt
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 144
; RV32-NEXT: ret
;
@@ -54,7 +57,9 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV64-NEXT: addi sp, sp, -144
; RV64-NEXT: .cfi_def_cfa_offset 144
; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
; RV64-NEXT: addi s0, sp, 144
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: csrr a0, vlenb
@@ -74,6 +79,7 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV64-NEXT: call callee_scalable_vector_split_indirect at plt
; RV64-NEXT: addi sp, s0, -144
; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 144
; RV64-NEXT: ret
%c = alloca i64
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll
index 92959576c5127..39a0d5a323a2f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll
@@ -593,9 +593,13 @@ define double @extractelt_nxv16f64_0(<vscale x 16 x double> %v) {
define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
; RV32-LABEL: extractelt_nxv16f64_neg1:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -64
-; RV32-NEXT: .cfi_def_cfa_offset 64
-; RV32-NEXT: addi s0, sp, 64
+; RV32-NEXT: addi sp, sp, -80
+; RV32-NEXT: .cfi_def_cfa_offset 80
+; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 80
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
@@ -610,15 +614,21 @@ define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: fld fa0, -8(a0)
-; RV32-NEXT: addi sp, s0, -64
-; RV32-NEXT: addi sp, sp, 64
+; RV32-NEXT: addi sp, s0, -80
+; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 80
; RV32-NEXT: ret
;
; RV64-LABEL: extractelt_nxv16f64_neg1:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -64
-; RV64-NEXT: .cfi_def_cfa_offset 64
-; RV64-NEXT: addi s0, sp, 64
+; RV64-NEXT: addi sp, sp, -80
+; RV64-NEXT: .cfi_def_cfa_offset 80
+; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 80
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 4
@@ -641,8 +651,10 @@ define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
; RV64-NEXT: slli a1, a2, 3
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: fld fa0, 0(a0)
-; RV64-NEXT: addi sp, s0, -64
-; RV64-NEXT: addi sp, sp, 64
+; RV64-NEXT: addi sp, s0, -80
+; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 80
; RV64-NEXT: ret
%r = extractelement <vscale x 16 x double> %v, i32 -1
ret double %r
@@ -660,34 +672,75 @@ define double @extractelt_nxv16f64_imm(<vscale x 16 x double> %v) {
}
define double @extractelt_nxv16f64_idx(<vscale x 16 x double> %v, i32 zeroext %idx) {
-; CHECK-LABEL: extractelt_nxv16f64_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: addi a2, a2, -1
-; CHECK-NEXT: bltu a0, a2, .LBB54_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a0, a2
-; CHECK-NEXT: .LBB54_2:
-; CHECK-NEXT: addi sp, sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: addi s0, sp, 64
-; CHECK-NEXT: .cfi_def_cfa s0, 0
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 4
-; CHECK-NEXT: sub sp, sp, a2
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: addi a2, sp, 64
-; CHECK-NEXT: add a0, a2, a0
-; CHECK-NEXT: vs8r.v v8, (a2)
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, a2, a1
-; CHECK-NEXT: vs8r.v v16, (a1)
-; CHECK-NEXT: fld fa0, 0(a0)
-; CHECK-NEXT: addi sp, s0, -64
-; CHECK-NEXT: addi sp, sp, 64
-; CHECK-NEXT: ret
+; RV32-LABEL: extractelt_nxv16f64_idx:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: bltu a0, a2, .LBB54_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: .LBB54_2:
+; RV32-NEXT: addi sp, sp, -80
+; RV32-NEXT: .cfi_def_cfa_offset 80
+; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 80
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: slli a2, a2, 4
+; RV32-NEXT: sub sp, sp, a2
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: slli a0, a0, 3
+; RV32-NEXT: addi a2, sp, 64
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: vs8r.v v8, (a2)
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: vs8r.v v16, (a1)
+; RV32-NEXT: fld fa0, 0(a0)
+; RV32-NEXT: addi sp, s0, -80
+; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 80
+; RV32-NEXT: ret
+;
+; RV64-LABEL: extractelt_nxv16f64_idx:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: bltu a0, a2, .LBB54_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: .LBB54_2:
+; RV64-NEXT: addi sp, sp, -80
+; RV64-NEXT: .cfi_def_cfa_offset 80
+; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 80
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: slli a2, a2, 4
+; RV64-NEXT: sub sp, sp, a2
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: addi a2, sp, 64
+; RV64-NEXT: add a0, a2, a0
+; RV64-NEXT: vs8r.v v8, (a2)
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: vs8r.v v16, (a1)
+; RV64-NEXT: fld fa0, 0(a0)
+; RV64-NEXT: addi sp, s0, -80
+; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 80
+; RV64-NEXT: ret
%r = extractelement <vscale x 16 x double> %v, i32 %idx
ret double %r
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index 253f827d3258f..e827619e11c06 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
define i1 @extractelt_nxv1i1(<vscale x 1 x i8>* %x, i64 %idx) nounwind {
; CHECK-LABEL: extractelt_nxv1i1:
@@ -129,41 +129,85 @@ define i1 @extractelt_nxv64i1(<vscale x 64 x i8>* %x, i64 %idx) nounwind {
}
define i1 @extractelt_nxv128i1(<vscale x 128 x i8>* %x, i64 %idx) nounwind {
-; CHECK-LABEL: extractelt_nxv128i1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a3, a2, 4
-; CHECK-NEXT: addi a3, a3, -1
-; CHECK-NEXT: bltu a1, a3, .LBB7_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a3
-; CHECK-NEXT: .LBB7_2:
-; CHECK-NEXT: addi sp, sp, -64
-; CHECK-NEXT: addi s0, sp, 64
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 4
-; CHECK-NEXT: sub sp, sp, a3
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: addi a3, sp, 64
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a4, a0, a2
-; CHECK-NEXT: vl8r.v v16, (a4)
-; CHECK-NEXT: vl8r.v v24, (a0)
-; CHECK-NEXT: add a0, a3, a1
-; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma
-; CHECK-NEXT: vmseq.vi v8, v16, 0
-; CHECK-NEXT: vmseq.vi v0, v24, 0
-; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: vmerge.vim v24, v16, 1, v0
-; CHECK-NEXT: vs8r.v v24, (a3)
-; CHECK-NEXT: add a1, a3, a2
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
-; CHECK-NEXT: vs8r.v v8, (a1)
-; CHECK-NEXT: lb a0, 0(a0)
-; CHECK-NEXT: addi sp, s0, -64
-; CHECK-NEXT: addi sp, sp, 64
-; CHECK-NEXT: ret
+; RV32-LABEL: extractelt_nxv128i1:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: slli a3, a2, 4
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: bltu a1, a3, .LBB7_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, a3
+; RV32-NEXT: .LBB7_2:
+; RV32-NEXT: addi sp, sp, -80
+; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi s0, sp, 80
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: slli a3, a3, 4
+; RV32-NEXT: sub sp, sp, a3
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: addi a3, sp, 64
+; RV32-NEXT: slli a2, a2, 3
+; RV32-NEXT: add a4, a0, a2
+; RV32-NEXT: vl8r.v v16, (a4)
+; RV32-NEXT: vl8r.v v24, (a0)
+; RV32-NEXT: add a0, a3, a1
+; RV32-NEXT: vsetvli a1, zero, e8, m8, ta, ma
+; RV32-NEXT: vmseq.vi v8, v16, 0
+; RV32-NEXT: vmseq.vi v0, v24, 0
+; RV32-NEXT: vmv.v.i v16, 0
+; RV32-NEXT: vmerge.vim v24, v16, 1, v0
+; RV32-NEXT: vs8r.v v24, (a3)
+; RV32-NEXT: add a1, a3, a2
+; RV32-NEXT: vmv1r.v v0, v8
+; RV32-NEXT: vmerge.vim v8, v16, 1, v0
+; RV32-NEXT: vs8r.v v8, (a1)
+; RV32-NEXT: lb a0, 0(a0)
+; RV32-NEXT: addi sp, s0, -80
+; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 80
+; RV32-NEXT: ret
+;
+; RV64-LABEL: extractelt_nxv128i1:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: slli a3, a2, 4
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: bltu a1, a3, .LBB7_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, a3
+; RV64-NEXT: .LBB7_2:
+; RV64-NEXT: addi sp, sp, -80
+; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT: addi s0, sp, 80
+; RV64-NEXT: csrr a3, vlenb
+; RV64-NEXT: slli a3, a3, 4
+; RV64-NEXT: sub sp, sp, a3
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: addi a3, sp, 64
+; RV64-NEXT: slli a2, a2, 3
+; RV64-NEXT: add a4, a0, a2
+; RV64-NEXT: vl8r.v v16, (a4)
+; RV64-NEXT: vl8r.v v24, (a0)
+; RV64-NEXT: add a0, a3, a1
+; RV64-NEXT: vsetvli a1, zero, e8, m8, ta, ma
+; RV64-NEXT: vmseq.vi v8, v16, 0
+; RV64-NEXT: vmseq.vi v0, v24, 0
+; RV64-NEXT: vmv.v.i v16, 0
+; RV64-NEXT: vmerge.vim v24, v16, 1, v0
+; RV64-NEXT: vs8r.v v24, (a3)
+; RV64-NEXT: add a1, a3, a2
+; RV64-NEXT: vmv1r.v v0, v8
+; RV64-NEXT: vmerge.vim v8, v16, 1, v0
+; RV64-NEXT: vs8r.v v8, (a1)
+; RV64-NEXT: lb a0, 0(a0)
+; RV64-NEXT: addi sp, s0, -80
+; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 80
+; RV64-NEXT: ret
%a = load <vscale x 128 x i8>, <vscale x 128 x i8>* %x
%b = icmp eq <vscale x 128 x i8> %a, zeroinitializer
%c = extractelement <vscale x 128 x i1> %b, i64 %idx
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
index 7686e0f8afdd9..d4280f6797284 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
@@ -873,9 +873,13 @@ define i32 @extractelt_nxv32i32_0(<vscale x 32 x i32> %v) {
define i32 @extractelt_nxv32i32_neg1(<vscale x 32 x i32> %v) {
; CHECK-LABEL: extractelt_nxv32i32_neg1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: addi s0, sp, 64
+; CHECK-NEXT: addi sp, sp, -80
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -4
+; CHECK-NEXT: .cfi_offset s0, -8
+; CHECK-NEXT: addi s0, sp, 80
; CHECK-NEXT: .cfi_def_cfa s0, 0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
@@ -890,8 +894,10 @@ define i32 @extractelt_nxv32i32_neg1(<vscale x 32 x i32> %v) {
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: lw a0, -4(a0)
-; CHECK-NEXT: addi sp, s0, -64
-; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: addi sp, s0, -80
+; CHECK-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 80
; CHECK-NEXT: ret
%r = extractelement <vscale x 32 x i32> %v, i32 -1
ret i32 %r
@@ -918,9 +924,13 @@ define i32 @extractelt_nxv32i32_idx(<vscale x 32 x i32> %v, i32 %idx) {
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: .LBB74_2:
-; CHECK-NEXT: addi sp, sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: addi s0, sp, 64
+; CHECK-NEXT: addi sp, sp, -80
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -4
+; CHECK-NEXT: .cfi_offset s0, -8
+; CHECK-NEXT: addi s0, sp, 80
; CHECK-NEXT: .cfi_def_cfa s0, 0
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
@@ -934,8 +944,10 @@ define i32 @extractelt_nxv32i32_idx(<vscale x 32 x i32> %v, i32 %idx) {
; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: lw a0, 0(a0)
-; CHECK-NEXT: addi sp, s0, -64
-; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: addi sp, s0, -80
+; CHECK-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 80
; CHECK-NEXT: ret
%r = extractelement <vscale x 32 x i32> %v, i32 %idx
ret i32 %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
index 1570df6cfd07c..6eeb6a695d39c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
@@ -804,9 +804,13 @@ define i64 @extractelt_nxv16i64_0(<vscale x 16 x i64> %v) {
define i64 @extractelt_nxv16i64_neg1(<vscale x 16 x i64> %v) {
; CHECK-LABEL: extractelt_nxv16i64_neg1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: addi s0, sp, 64
+; CHECK-NEXT: addi sp, sp, -80
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: addi s0, sp, 80
; CHECK-NEXT: .cfi_def_cfa s0, 0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
@@ -829,8 +833,10 @@ define i64 @extractelt_nxv16i64_neg1(<vscale x 16 x i64> %v) {
; CHECK-NEXT: slli a1, a2, 3
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ld a0, 0(a0)
-; CHECK-NEXT: addi sp, s0, -64
-; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: addi sp, s0, -80
+; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 80
; CHECK-NEXT: ret
%r = extractelement <vscale x 16 x i64> %v, i32 -1
ret i64 %r
@@ -857,9 +863,13 @@ define i64 @extractelt_nxv16i64_idx(<vscale x 16 x i64> %v, i32 zeroext %idx) {
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: .LBB74_2:
-; CHECK-NEXT: addi sp, sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: addi s0, sp, 64
+; CHECK-NEXT: addi sp, sp, -80
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: addi s0, sp, 80
; CHECK-NEXT: .cfi_def_cfa s0, 0
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
@@ -873,8 +883,10 @@ define i64 @extractelt_nxv16i64_idx(<vscale x 16 x i64> %v, i32 zeroext %idx) {
; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: ld a0, 0(a0)
-; CHECK-NEXT: addi sp, s0, -64
-; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: addi sp, s0, -80
+; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 80
; CHECK-NEXT: ret
%r = extractelement <vscale x 16 x i64> %v, i32 %idx
ret i64 %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index 75c9a1fb2a173..6ed907f8e9106 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1
define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, <2 x i32>* %svp) {
; CHECK-LABEL: insert_nxv8i32_v2i32_0:
@@ -557,32 +557,71 @@ define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, <vscale x 16 x i64>* %ou
; Check we don't mistakenly optimize this: we don't know whether this is
; inserted into the low or high split vector.
define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, <vscale x 16 x i64>* %out) {
-; CHECK-LABEL: insert_v2i64_nxv16i64_hi:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: addi s0, sp, 64
-; CHECK-NEXT: .cfi_def_cfa s0, 0
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 4
-; CHECK-NEXT: sub sp, sp, a2
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi a0, sp, 128
-; CHECK-NEXT: vse64.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: addi a2, sp, 64
-; CHECK-NEXT: add a3, a2, a0
-; CHECK-NEXT: vl8re64.v v8, (a3)
-; CHECK-NEXT: vl8re64.v v16, (a2)
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: vs8r.v v16, (a1)
-; CHECK-NEXT: addi sp, s0, -64
-; CHECK-NEXT: addi sp, sp, 64
-; CHECK-NEXT: ret
+; RV32-LABEL: insert_v2i64_nxv16i64_hi:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -80
+; RV32-NEXT: .cfi_def_cfa_offset 80
+; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 80
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: slli a2, a2, 4
+; RV32-NEXT: sub sp, sp, a2
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: addi a0, sp, 128
+; RV32-NEXT: vse64.v v8, (a0)
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 3
+; RV32-NEXT: addi a2, sp, 64
+; RV32-NEXT: add a3, a2, a0
+; RV32-NEXT: vl8re64.v v8, (a3)
+; RV32-NEXT: vl8re64.v v16, (a2)
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: vs8r.v v8, (a0)
+; RV32-NEXT: vs8r.v v16, (a1)
+; RV32-NEXT: addi sp, s0, -80
+; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 80
+; RV32-NEXT: ret
+;
+; RV64-LABEL: insert_v2i64_nxv16i64_hi:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -80
+; RV64-NEXT: .cfi_def_cfa_offset 80
+; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 80
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: slli a2, a2, 4
+; RV64-NEXT: sub sp, sp, a2
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a0, sp, 128
+; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: addi a2, sp, 64
+; RV64-NEXT: add a3, a2, a0
+; RV64-NEXT: vl8re64.v v8, (a3)
+; RV64-NEXT: vl8re64.v v16, (a2)
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: vs8r.v v8, (a0)
+; RV64-NEXT: vs8r.v v16, (a1)
+; RV64-NEXT: addi sp, s0, -80
+; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 80
+; RV64-NEXT: ret
%sv = load <2 x i64>, <2 x i64>* %psv
%v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 8)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
index b59e5b3afd742..66fde7ad0a570 100644
--- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
@@ -1668,35 +1668,221 @@ define <vscale x 6 x i64> @reverse_nxv6i64(<vscale x 6 x i64> %a) {
}
define <vscale x 12 x i64> @reverse_nxv12i64(<vscale x 12 x i64> %a) {
-; CHECK-LABEL: reverse_nxv12i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: addi s0, sp, 64
-; CHECK-NEXT: .cfi_def_cfa s0, 0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: addi a1, a0, -1
-; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; CHECK-NEXT: vid.v v24
-; CHECK-NEXT: vrsub.vx v24, v24, a1
-; CHECK-NEXT: vrgather.vv v0, v16, v24
-; CHECK-NEXT: vmv4r.v v16, v4
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmv4r.v v20, v0
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: addi a1, sp, 64
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: vs4r.v v4, (a0)
-; CHECK-NEXT: vs8r.v v16, (a1)
-; CHECK-NEXT: vl8re64.v v16, (a0)
-; CHECK-NEXT: vl8re64.v v8, (a1)
-; CHECK-NEXT: addi sp, s0, -64
-; CHECK-NEXT: addi sp, sp, 64
-; CHECK-NEXT: ret
+; RV32-BITS-UNKNOWN-LABEL: reverse_nxv12i64:
+; RV32-BITS-UNKNOWN: # %bb.0:
+; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -80
+; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 80
+; RV32-BITS-UNKNOWN-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-BITS-UNKNOWN-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-BITS-UNKNOWN-NEXT: .cfi_offset ra, -4
+; RV32-BITS-UNKNOWN-NEXT: .cfi_offset s0, -8
+; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 80
+; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
+; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 4
+; RV32-BITS-UNKNOWN-NEXT: sub sp, sp, a0
+; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -64
+; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV32-BITS-UNKNOWN-NEXT: addi a1, a0, -1
+; RV32-BITS-UNKNOWN-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; RV32-BITS-UNKNOWN-NEXT: vid.v v24
+; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v24, v24, a1
+; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v0, v16, v24
+; RV32-BITS-UNKNOWN-NEXT: vmv4r.v v16, v4
+; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v0, v8, v24
+; RV32-BITS-UNKNOWN-NEXT: vmv4r.v v20, v0
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 3
+; RV32-BITS-UNKNOWN-NEXT: addi a1, sp, 64
+; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0
+; RV32-BITS-UNKNOWN-NEXT: vs4r.v v4, (a0)
+; RV32-BITS-UNKNOWN-NEXT: vs8r.v v16, (a1)
+; RV32-BITS-UNKNOWN-NEXT: vl8re64.v v16, (a0)
+; RV32-BITS-UNKNOWN-NEXT: vl8re64.v v8, (a1)
+; RV32-BITS-UNKNOWN-NEXT: addi sp, s0, -80
+; RV32-BITS-UNKNOWN-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-BITS-UNKNOWN-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 80
+; RV32-BITS-UNKNOWN-NEXT: ret
+;
+; RV32-BITS-256-LABEL: reverse_nxv12i64:
+; RV32-BITS-256: # %bb.0:
+; RV32-BITS-256-NEXT: addi sp, sp, -80
+; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 80
+; RV32-BITS-256-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-BITS-256-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-BITS-256-NEXT: .cfi_offset ra, -4
+; RV32-BITS-256-NEXT: .cfi_offset s0, -8
+; RV32-BITS-256-NEXT: addi s0, sp, 80
+; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0
+; RV32-BITS-256-NEXT: csrr a0, vlenb
+; RV32-BITS-256-NEXT: slli a0, a0, 4
+; RV32-BITS-256-NEXT: sub sp, sp, a0
+; RV32-BITS-256-NEXT: andi sp, sp, -64
+; RV32-BITS-256-NEXT: csrr a0, vlenb
+; RV32-BITS-256-NEXT: addi a1, a0, -1
+; RV32-BITS-256-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; RV32-BITS-256-NEXT: vid.v v24
+; RV32-BITS-256-NEXT: vrsub.vx v24, v24, a1
+; RV32-BITS-256-NEXT: vrgather.vv v0, v16, v24
+; RV32-BITS-256-NEXT: vmv4r.v v16, v4
+; RV32-BITS-256-NEXT: vrgather.vv v0, v8, v24
+; RV32-BITS-256-NEXT: vmv4r.v v20, v0
+; RV32-BITS-256-NEXT: slli a0, a0, 3
+; RV32-BITS-256-NEXT: addi a1, sp, 64
+; RV32-BITS-256-NEXT: add a0, a1, a0
+; RV32-BITS-256-NEXT: vs4r.v v4, (a0)
+; RV32-BITS-256-NEXT: vs8r.v v16, (a1)
+; RV32-BITS-256-NEXT: vl8re64.v v16, (a0)
+; RV32-BITS-256-NEXT: vl8re64.v v8, (a1)
+; RV32-BITS-256-NEXT: addi sp, s0, -80
+; RV32-BITS-256-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-BITS-256-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-BITS-256-NEXT: addi sp, sp, 80
+; RV32-BITS-256-NEXT: ret
+;
+; RV32-BITS-512-LABEL: reverse_nxv12i64:
+; RV32-BITS-512: # %bb.0:
+; RV32-BITS-512-NEXT: addi sp, sp, -80
+; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 80
+; RV32-BITS-512-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-BITS-512-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-BITS-512-NEXT: .cfi_offset ra, -4
+; RV32-BITS-512-NEXT: .cfi_offset s0, -8
+; RV32-BITS-512-NEXT: addi s0, sp, 80
+; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0
+; RV32-BITS-512-NEXT: csrr a0, vlenb
+; RV32-BITS-512-NEXT: slli a0, a0, 4
+; RV32-BITS-512-NEXT: sub sp, sp, a0
+; RV32-BITS-512-NEXT: andi sp, sp, -64
+; RV32-BITS-512-NEXT: csrr a0, vlenb
+; RV32-BITS-512-NEXT: addi a1, a0, -1
+; RV32-BITS-512-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; RV32-BITS-512-NEXT: vid.v v24
+; RV32-BITS-512-NEXT: vrsub.vx v24, v24, a1
+; RV32-BITS-512-NEXT: vrgather.vv v0, v16, v24
+; RV32-BITS-512-NEXT: vmv4r.v v16, v4
+; RV32-BITS-512-NEXT: vrgather.vv v0, v8, v24
+; RV32-BITS-512-NEXT: vmv4r.v v20, v0
+; RV32-BITS-512-NEXT: slli a0, a0, 3
+; RV32-BITS-512-NEXT: addi a1, sp, 64
+; RV32-BITS-512-NEXT: add a0, a1, a0
+; RV32-BITS-512-NEXT: vs4r.v v4, (a0)
+; RV32-BITS-512-NEXT: vs8r.v v16, (a1)
+; RV32-BITS-512-NEXT: vl8re64.v v16, (a0)
+; RV32-BITS-512-NEXT: vl8re64.v v8, (a1)
+; RV32-BITS-512-NEXT: addi sp, s0, -80
+; RV32-BITS-512-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-BITS-512-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-BITS-512-NEXT: addi sp, sp, 80
+; RV32-BITS-512-NEXT: ret
+;
+; RV64-BITS-UNKNOWN-LABEL: reverse_nxv12i64:
+; RV64-BITS-UNKNOWN: # %bb.0:
+; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -80
+; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 80
+; RV64-BITS-UNKNOWN-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-BITS-UNKNOWN-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-BITS-UNKNOWN-NEXT: .cfi_offset ra, -8
+; RV64-BITS-UNKNOWN-NEXT: .cfi_offset s0, -16
+; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 80
+; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
+; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 4
+; RV64-BITS-UNKNOWN-NEXT: sub sp, sp, a0
+; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -64
+; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV64-BITS-UNKNOWN-NEXT: addi a1, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; RV64-BITS-UNKNOWN-NEXT: vid.v v24
+; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v24, a1
+; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v0, v16, v24
+; RV64-BITS-UNKNOWN-NEXT: vmv4r.v v16, v4
+; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v0, v8, v24
+; RV64-BITS-UNKNOWN-NEXT: vmv4r.v v20, v0
+; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 3
+; RV64-BITS-UNKNOWN-NEXT: addi a1, sp, 64
+; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0
+; RV64-BITS-UNKNOWN-NEXT: vs4r.v v4, (a0)
+; RV64-BITS-UNKNOWN-NEXT: vs8r.v v16, (a1)
+; RV64-BITS-UNKNOWN-NEXT: vl8re64.v v16, (a0)
+; RV64-BITS-UNKNOWN-NEXT: vl8re64.v v8, (a1)
+; RV64-BITS-UNKNOWN-NEXT: addi sp, s0, -80
+; RV64-BITS-UNKNOWN-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-BITS-UNKNOWN-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 80
+; RV64-BITS-UNKNOWN-NEXT: ret
+;
+; RV64-BITS-256-LABEL: reverse_nxv12i64:
+; RV64-BITS-256: # %bb.0:
+; RV64-BITS-256-NEXT: addi sp, sp, -80
+; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 80
+; RV64-BITS-256-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-BITS-256-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-BITS-256-NEXT: .cfi_offset ra, -8
+; RV64-BITS-256-NEXT: .cfi_offset s0, -16
+; RV64-BITS-256-NEXT: addi s0, sp, 80
+; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0
+; RV64-BITS-256-NEXT: csrr a0, vlenb
+; RV64-BITS-256-NEXT: slli a0, a0, 4
+; RV64-BITS-256-NEXT: sub sp, sp, a0
+; RV64-BITS-256-NEXT: andi sp, sp, -64
+; RV64-BITS-256-NEXT: csrr a0, vlenb
+; RV64-BITS-256-NEXT: addi a1, a0, -1
+; RV64-BITS-256-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; RV64-BITS-256-NEXT: vid.v v24
+; RV64-BITS-256-NEXT: vrsub.vx v24, v24, a1
+; RV64-BITS-256-NEXT: vrgather.vv v0, v16, v24
+; RV64-BITS-256-NEXT: vmv4r.v v16, v4
+; RV64-BITS-256-NEXT: vrgather.vv v0, v8, v24
+; RV64-BITS-256-NEXT: vmv4r.v v20, v0
+; RV64-BITS-256-NEXT: slli a0, a0, 3
+; RV64-BITS-256-NEXT: addi a1, sp, 64
+; RV64-BITS-256-NEXT: add a0, a1, a0
+; RV64-BITS-256-NEXT: vs4r.v v4, (a0)
+; RV64-BITS-256-NEXT: vs8r.v v16, (a1)
+; RV64-BITS-256-NEXT: vl8re64.v v16, (a0)
+; RV64-BITS-256-NEXT: vl8re64.v v8, (a1)
+; RV64-BITS-256-NEXT: addi sp, s0, -80
+; RV64-BITS-256-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-BITS-256-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-BITS-256-NEXT: addi sp, sp, 80
+; RV64-BITS-256-NEXT: ret
+;
+; RV64-BITS-512-LABEL: reverse_nxv12i64:
+; RV64-BITS-512: # %bb.0:
+; RV64-BITS-512-NEXT: addi sp, sp, -80
+; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 80
+; RV64-BITS-512-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-BITS-512-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-BITS-512-NEXT: .cfi_offset ra, -8
+; RV64-BITS-512-NEXT: .cfi_offset s0, -16
+; RV64-BITS-512-NEXT: addi s0, sp, 80
+; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0
+; RV64-BITS-512-NEXT: csrr a0, vlenb
+; RV64-BITS-512-NEXT: slli a0, a0, 4
+; RV64-BITS-512-NEXT: sub sp, sp, a0
+; RV64-BITS-512-NEXT: andi sp, sp, -64
+; RV64-BITS-512-NEXT: csrr a0, vlenb
+; RV64-BITS-512-NEXT: addi a1, a0, -1
+; RV64-BITS-512-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; RV64-BITS-512-NEXT: vid.v v24
+; RV64-BITS-512-NEXT: vrsub.vx v24, v24, a1
+; RV64-BITS-512-NEXT: vrgather.vv v0, v16, v24
+; RV64-BITS-512-NEXT: vmv4r.v v16, v4
+; RV64-BITS-512-NEXT: vrgather.vv v0, v8, v24
+; RV64-BITS-512-NEXT: vmv4r.v v20, v0
+; RV64-BITS-512-NEXT: slli a0, a0, 3
+; RV64-BITS-512-NEXT: addi a1, sp, 64
+; RV64-BITS-512-NEXT: add a0, a1, a0
+; RV64-BITS-512-NEXT: vs4r.v v4, (a0)
+; RV64-BITS-512-NEXT: vs8r.v v16, (a1)
+; RV64-BITS-512-NEXT: vl8re64.v v16, (a0)
+; RV64-BITS-512-NEXT: vl8re64.v v8, (a1)
+; RV64-BITS-512-NEXT: addi sp, s0, -80
+; RV64-BITS-512-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-BITS-512-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-BITS-512-NEXT: addi sp, sp, 80
+; RV64-BITS-512-NEXT: ret
%res = call <vscale x 12 x i64> @llvm.experimental.vector.reverse.nxv12i64(<vscale x 12 x i64> %a)
ret <vscale x 12 x i64> %res
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
index ca249cc4ef2d0..27a5059e5a9eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
@@ -22,17 +22,22 @@ define <vscale x 16 x i32> @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5,
define <vscale x 16 x i32> @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, <vscale x 16 x i32> %x) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -80
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: addi sp, sp, -96
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
-; CHECK-NEXT: addi s0, sp, 80
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: addi s0, sp, 96
; CHECK-NEXT: .cfi_def_cfa s0, 0
; CHECK-NEXT: csrr t0, vlenb
; CHECK-NEXT: slli t0, t0, 4
; CHECK-NEXT: sub sp, sp, t0
; CHECK-NEXT: andi sp, sp, -64
; CHECK-NEXT: mv s1, sp
+; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: addi t0, s1, 64
; CHECK-NEXT: vs8r.v v8, (t0)
; CHECK-NEXT: csrr t1, vlenb
@@ -44,9 +49,12 @@ define <vscale x 16 x i32> @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5,
; CHECK-NEXT: sd t1, 0(sp)
; CHECK-NEXT: vmv8r.v v16, v8
; CHECK-NEXT: call bar at plt
-; CHECK-NEXT: addi sp, s0, -80
-; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 80
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: addi sp, s0, -96
+; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 96
; CHECK-NEXT: ret
%ret = call <vscale x 16 x i32> @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, <vscale x 16 x i32> %x, <vscale x 16 x i32> %x, <vscale x 16 x i32> %x, <vscale x 16 x i32> %x)
ret <vscale x 16 x i32> %ret
More information about the llvm-commits
mailing list